This package contains the data of bioenergetic features of 152 CLL samples and 9 B cell samples, measured by Seahorse extracellular flux assay. This package also reproduces figures presented in the paper “Energy metabolism influences drug sensitivity and is co-determined by genetic variants in CLL” by Lu J, Böttcher M et al.
In this vignette we present the integrative analysis of CLL bioenergetic data set and the source code for the paper
This vignette was put together by Junyan Lu
This vignette is build from the sub-vignettes, which each can be build separatelly. The parts are separated by the horizontal lines. Each part finishes with removal of all the created objects.
library(seahorseCLL)
library(BloodCancerMultiOmics2017)
library(SummarizedExperiment)
library(ggbeeswarm)
library(xtable)
library(cowplot)
library(piano)
library(gridExtra)
library(grid)
library(genefilter)
library(pheatmap)
library(robustbase)
library(DESeq2)
library(survival)
library(maxstat)
library(survminer)
library(glmnet)
library(RColorBrewer)
library(reshape2)
library(gtable)
library(Biobase)
library(tidyverse)
Load data
data("seaBcell", "seaOri", "patmeta")
Combine the two data sets to one matrix
stopifnot(rownames(seaBcell) == rownames(seaOri))
seaOri$diagnosis <- patmeta[colnames(seaOri),]$Diagnosis
seaOri <- seaOri[,seaOri$diagnosis == "CLL"] # choose CLL samples
seaMat <- cbind(assays(seaBcell)$seaMedian, assays(seaOri)$seaMedian)
seaBatch <- rbind(colData(seaBcell)[,"dateMST", drop= FALSE], colData(seaOri)[,"dateMST", drop = FALSE])
#remove samples that contain NA values
seaMat <- seaMat[,complete.cases(t(seaMat))]
Principal component analysis
resPC <- prcomp(t(seaMat), center = TRUE, scale. = TRUE)
varExp <- resPC$sdev^2/sum(resPC$sdev^2)
Plot the first two principal components
#define color
colorList <- c(`B cell` = "#FF3030", CLL= "#1E90FF")
plotTab <- data.frame(resPC$x[,c(1,2)])
plotTab$type <- c(rep("B cell", ncol(seaBcell)), rep("CLL", nrow(plotTab)- ncol(seaBcell))) #10 normal b cell samples
pcaPlot <- ggplot(plotTab, aes(x=PC1, y=PC2, color = type)) + geom_point(size=3) +
xlab(sprintf("PC1 (%2.1f%s)",varExp[1]*100,"%")) + ylab(sprintf("PC2 (%2.1f%s)", varExp[2]*100, "%")) +
theme_bw() + theme(legend.position = c(0.9,0.9), legend.title = element_blank(),
legend.background = element_rect(color="grey"),
axis.text = element_text(size =18),
axis.title = element_text(size = 20)) +
scale_color_manual(values = colorList) + coord_cartesian(xlim = c(-5.5,5.5), ylim = c(-5.5,5.5))
pcaPlot
Prepare table for hypothesis test
seaMat <- data.frame(cbind(assays(seaBcell)$seaMedian, assays(seaOri)$seaMedian)) %>% rownames_to_column(var = "measure")
seaTab <- gather(seaMat, key = "patientID", value = "value", -measure) %>%
mutate(type = ifelse(substr(patientID, 1, 1) == "K", "B cell", "CLL")) %>% mutate(type = factor(type)) %>%
mutate(batch = as.factor(seaBatch[patientID, ]))
t-test for each measurment
pTab <- group_by(seaTab, measure) %>% do((function(x) {
res <- t.test(value ~ type, x, equal.var = TRUE)
data.frame(p = res$p.value,
diff = res$estimate[[2]] - res$estimate[[1]])
}) (.))
pTab$p.adj <- p.adjust(pTab$p, method = "BH")
ANOVA-test for each measurment (accounting for batch effect)
pTab.aov <- group_by(seaTab, measure) %>% do((function(x) {
res <- summary(aov(value ~ type + batch, x))
data.frame(p = res[[1]][["Pr(>F)"]][1])
}) (.))
pTab.aov$p.adj <- p.adjust(pTab.aov$p, method = "BH")
Expor the table to LaTex format using xtable
expTab <- pTab %>% ungroup() %>% mutate(measure = gsub("\\."," ",measure)) %>%
rename("Seahorse measurement" = measure, "Difference of mean" = diff) %>%
mutate(p = pTab.aov$p, `adjusted p` = pTab.aov$p.adj) %>%
select(-p.adj)
expTab[expTab$`Seahorse measurement` == "ECAR OCR ratio", 1] <- "ECAR/OCR"
fileConn <- file("section01/tTest_BcellVSCLL.tex")
writeLines(print(xtable(expTab, digits = 3,
caption = "ANOVA test results of energy metabolic measurements between CLL cells and normal B cells "),
include.rownames=FALSE,
caption.placement = "top"), fileConn)
close(fileConn)
Beeswarms plot for select measurement
measureList <- c("basal.respiration","glycolysis","ATP.production","glycolytic.capacity","maximal.respiration","glycolytic.reserve")
gList <- lapply(measureList, function(seaName) {
plotTab <- filter(seaTab, measure == seaName, !is.na(value))
pval <- filter(pTab.aov, measure == seaName )$p
#unit y (add unit to y axis, based on the type of measurement)
if (seaName %in% c("basal.respiration","ATP.production","maximal.respiration")) {
yLab <- "OCR (pMol/min)"
} else yLab <- "ECAR (pMol/min)"
#replace the "." in the mearement name with space
seaName <- gsub("\\."," ",seaName)
#plot title
#plotTitle <- sprintf("p value = %s", format(pval, digits = 2, scientific = TRUE))
plotTitle <- paste(sprintf("'%s (p = '~",seaName),
sciPretty(pval, digits = 2),"*')'")
ggplot(plotTab, aes(x=type, y = value)) +
stat_boxplot(geom = "errorbar", width = 0.3) +
geom_boxplot(outlier.shape = NA, col="black", width=0.4) +
geom_beeswarm(cex=2, size =0.5, aes(col = type)) + theme_classic() +
xlab("") + ylab(yLab) + ggtitle(parse(text = plotTitle)) +
theme(axis.line.x = element_blank(), axis.ticks.x = element_blank(),
axis.title.y = element_text(size=10, face="bold"),
axis.text = element_text(size=11),
plot.title = element_text(size = 12, hjust=0.5),
legend.position = "none",
axis.text.x = element_text(face="bold",size=12)) +
scale_color_manual(values = colorList)
})
beePlot <- grid.arrange(grobs = gList, ncol=2)
Combine the PCA plot and beeswarm plots (Figure 1)
title = ggdraw() + draw_figure_label("Figure 1", fontface = "bold", position = "top.left",size=20)
p <- plot_grid(pcaPlot, beePlot, labels= c("A","B"), rel_widths = c(1,1), label_size = 20)
plot_grid(title, p, rel_heights = c(0.05,0.95), ncol = 1)
Load data
data("lpdAll", "seaOri","seaCombat", "patmeta", "mutCOM")
Subsetting
#overlap between the patient samples in seahorse dataset and main screen dataset
lpdCLL <- lpdAll[,pData(lpdAll)$Diagnosis %in% "CLL"]
seaMain <- intersect(colnames(seaOri),colnames(lpdCLL))
length(seaMain)
## [1] 140
#CLL seahorse data
seaSub <- t(assays(seaOri[,seaMain])$seaMedian)
Get patient genetic background
#extract genetic background information
genBack <- Biobase::exprs(lpdCLL)[fData(lpdCLL)$type %in%
c("gen","Methylation_Cluster","IGHV"), seaMain]
genBack <- genBack[! rownames(genBack) %in%
c("del13q14_bi","del13q14_mono"),]
genBack <- data.frame(t(genBack))
colnames(genBack) <- replace(colnames(genBack), colnames(genBack) == "del13q14_any", "del13q14")
Pre-processing genetic background matrix
#get genetic background information
geneSub <- genBack[seaMain,]
geneSub <- t(geneSub)
#filtering genetic background matrix.
#Each sample should have at least 10 records
#and each variant should be exist in at least 5 samples
geneSub <- geneSub[rowSums(!is.na(geneSub)) >= 10,]
geneSub <- geneSub[rowSums(geneSub >=1, na.rm = TRUE) >= 5, ]
seaTest <- t(seaSub)
Perform ANOVA-test, including batch as a co-variate.
#record missing values
sea.noNA <- !is.na(seaTest)
mut.noNA <- !is.na(geneSub)
#prepare matrix to store raw p-values
p.raw.mat <- mean.mat <- matrix(NA, nrow(seaTest),nrow(geneSub))
colnames(p.raw.mat) <- colnames(mean.mat) <- rownames(geneSub)
rownames(p.raw.mat) <- rownames(mean.mat) <- rownames(seaTest)
#calculate p-value for each measurement-variant combination.
for (i in 1:nrow(seaTest)) {
for (j in 1:nrow(geneSub)) {
com.noNA <- (sea.noNA[i,] & mut.noNA[j,])
genotype <- geneSub[j, com.noNA]
batch <- factor(colData(seaOri)[names(com.noNA[com.noNA]),]$dateMST)
if (length(genotype) >= 10 & sum(genotype) >= 5) {
seaGen <- seaTest[i,com.noNA]
genotype <- as.factor(genotype)
res <- summary(aov(seaGen ~ genotype + batch))
p.raw.mat[i,j] <- res[[1]][5][1,]
mean.mat[i,j] <- mean(seaGen[genotype==1]) - mean(seaGen[genotype==0])
}
}
}
Multiple hypothesis testing
#processing the p-value and multi-hypothesis correlations.
p.tab <- melt(p.raw.mat)
colnames(p.tab) <- c('Measurement','Variant','P.raw')
p.tab$FC <- melt(mean.mat)$value
p.tab$Measurement <- as.character(p.tab$Measurement)
p.tab$Variant <- as.character(p.tab$Variant)
p.tab <- filter(p.tab, !is.na(P.raw)) #remove NAs
p.tab$P.adj <- p.adjust(p.tab$P.raw, method = "BH")
#select and show the significant one (according to the raw p value)
p.tab.sig <- p.tab[p.tab$P.raw <= 0.05,]
p.tab.sig <- p.tab.sig[order(p.tab.sig$Measurement),]
hist(p.tab$P.raw, breaks=20, col= "lightgreen",
main = "Seahorse VS genetics", xlab = "raw P values")
Create a table show significant associations (for supplementary table)
#pCut <- 0.01
tabOut <- filter(p.tab, P.adj <= 0.1) %>% mutate(Measurement = formatSea(Measurement)) %>%
mutate(Variant = ifelse(Variant == "IGHV.Uppsala.U.M", "IGHV status", Variant)) %>%
select(Measurement, Variant, P.raw, FC, P.adj)
colnames(tabOut) <- c("Seahorse measurment", "Genetic variant", "p value", "Difference of mean", "adjusted p value")
write(print(xtable(tabOut, digits = 3,
caption = "ANOVA test results (adjusted for batch effect) of energy metabolic measurements related to different genetic variants"),
include.rownames=FALSE,
caption.placement = "top"), file = "section02/tTest_SeahorseVSgene.tex")
Scatter plot of p values
#prepare table for plot
p.tab[p.tab$P.adj >= 0.05,]$Variant <- "not significant"
#prepare table for plot
plotTab <- mutate(p.tab, Variant = ifelse(Variant == "IGHV.Uppsala.U.M", "IGHV status", Variant))
plotTab$Measurement <- sapply(plotTab$Measurement, function(x) {gsub("\\."," ",x)})
#define color
colList <- c("#c0508a", "#79c858", "#7e45b9", "#c0ad52",
"#8c8bbd", "#c35c41", "#86bca8", "#4b3e3a","grey80")
varName <- sort(as.character(unique(plotTab$Variant)))
varName <- c(varName[varName != "not significant"],"not significant")
names(colList) <- varName
#define legend order
plotTab$Variant <- factor(plotTab$Variant, levels = rev(names(colList)))
#fdr cut-off
fdrCut <- max(filter(plotTab, Variant != "not significant")$P.raw)
p <- ggplot(data=plotTab, aes(x= Measurement, y=-log10(P.raw),col=Variant))+
geom_jitter(size=3, width = 0.15) +
geom_hline(yintercept = -log10(fdrCut), linetype="dotted") +
ylab(expression(-log[10]*'('*p~value*')')) + xlab("Seahore measurements") +
theme_bw() + scale_color_manual(values = colList) +
theme(axis.text.x = element_text(vjust=0.5, hjust = 1,size=15),
axis.text.y = element_text(size=15),
axis.title = element_text(size =18)) +
annotate(geom = "text", x = 0.7, y = -log10(fdrCut) - 0.5, label = "5% FDR") +
guides(color=guide_legend(title="Genetic variant")) +
coord_flip()
plot(p)
Plot all significant associations as beeswarm plot
p.tab.sig <- p.tab[p.tab$Variant != "not significant",]
#batch effect corrected values should be used for plot
seaPlot <- assays(seaCombat)$seaMedian
seaPlot <- seaPlot[rownames(seaTest), colnames(seaTest)]
gList <- lapply(seq(1,nrow(p.tab.sig)), function(i) {
seaName <- as.character(p.tab.sig[i,1])
geneName <- as.character(p.tab.sig[i,2])
pval <- p.tab.sig[i,3]
plotTab <- tibble(Measurement = seaPlot[seaName,], Mutation = geneSub[geneName,]) %>%
filter(!is.na(Measurement), !is.na(Mutation))
#for IGHV
if (geneName == "IGHV.Uppsala.U.M") {
geneName <- "IGHV status"
plotTab <- mutate(plotTab,
Status = ifelse(Mutation == 0,
sprintf("unmutated (n=%s)", sum(Mutation == 0)),
sprintf("mutated (n=%s)", sum(Mutation == 1))))
} else if (geneName == "Methylation_Cluster") {
plotTab <- mutate(plotTab,
Status = ifelse(Mutation == 0,sprintf("LP (n=%s)", sum(Mutation == 0)),
ifelse(Mutation == 1, sprintf("IP (n=%s)", sum(Mutation == 1)),
sprintf("HP (n=%s)", sum(Mutation == 2)))))
} else plotTab <- mutate(plotTab,
Status = ifelse(Mutation == 0,
sprintf("wild type (n=%s)", sum(Mutation == 0)),
sprintf("mutated (n=%s)", sum(Mutation == 1))))
#reverse label factor (wildtype always on the rigt)
plotTab <- mutate(plotTab, Status = factor(Status)) %>%
mutate(Status = factor(Status, levels = rev(levels(Status))))
# color scheme, black for wildtype, red for mutated
if (length(levels(plotTab$Status)) == 2) {
colorList <- c("black","red")
names(colorList) <- levels(plotTab$Status)
} else {
colorList <- c("lightblue","blue", "darkblue")
names(colorList) <- levels(plotTab$Status)
}
#set y label
if (seaName %in% c("ECAR.OCR.ration")) {
yLab = "ECAR/OCR"
} else if (seaName %in% c("maximal.respiration","spare.respiratory.capacity","basal.respiration",
"ATP.production","OCR","proton.leak")) {
yLab = "OCR (pMol/min)" } else yLab = "ECAR (pMol/min)"
#replace the "." in the mearement name with space
seaName <- gsub("\\."," ",seaName)
#plot title
plotTitle <- paste(sprintf("'%s ~ %s (p = '~",seaName,geneName),
sciPretty(pval, digits = 2),"*')'")
ggplot(plotTab, aes(x=Status, y = Measurement)) +
stat_boxplot(geom = "errorbar", width = 0.3) +
geom_boxplot(outlier.shape = NA, col="black", width=0.4) +
geom_beeswarm(cex=2, size =1, aes(col = Status)) + theme_classic() +
xlab("") + ylab(yLab) + ggtitle(parse(text=plotTitle)) +
scale_color_manual(values = colorList) +
theme(axis.line.x = element_blank(), axis.ticks.x = element_blank(),
axis.title = element_text(size=18, face="bold"),
axis.text = element_text(size=18),
plot.title = element_text(hjust=0.5,size=15),
legend.position = "none",
axis.title.x = element_text(face="bold"))
})
Combine p value scatter plot and beeswarm plots (Figure 2)
title = ggdraw() + draw_figure_label("Figure 2", fontface = "bold", position = "top.left",size=20)
p <- ggdraw() +
draw_plot(p, 0, 0, 0.6, 1) +
draw_plot(gList[[13]], 0.6, 0 , .4, .48) +
draw_plot(gList[[15]], 0.6, 0.5 , .4, .48) +
draw_plot_label(c("A", "B", "C"),
c(0, 0.58, 0.58), c(1, 1, 0.5), size = 20)
plot_grid(title, p, rel_heights = c(0.05,0.95), ncol = 1)
Plot the rest in a separate pdf file (For supplementary figure)
gList[c(13,15)] <- NULL
grid.arrange(grobs = gList, ncol=2)
Load data set
data("lpdAll","drugs", "patmeta", "seaCombat", "conctab")
Sample subsetting
#get drug response data for CLL samples only
lpdCLL <- lpdAll[fData(lpdAll)$type == "viab",pData(lpdAll)$Diagnosis == "CLL"]
#get overlapped samples
sampleOverlap <- intersect(colnames(lpdCLL), colnames(seaCombat))
seaSub <- seaCombat[,sampleOverlap]
lpdCLL <- lpdCLL[,sampleOverlap]
How many overlapped samples?
length(sampleOverlap)
## [1] 140
Remove bad drugs. Bortezomib lost its activity during storage. The data for this drug and NSC 74859 were discarded from further analysis.
badrugs = c("D_008", "D_025")
lpdCLL <- lpdCLL[!fData(lpdCLL)$id %in% badrugs,]
Get drug response data
# get drug responsee data
get.drugresp <- function(lpd) {
drugresp = t(Biobase::exprs(lpd[fData(lpd)$type == 'viab'])) %>%
tbl_df %>% dplyr::select(-ends_with(":5")) %>%
dplyr::mutate(ID = colnames(lpd)) %>%
tidyr::gather(drugconc, viab, -ID) %>%
dplyr::mutate(drug = drugs[substring(drugconc, 1, 5), "name"],
conc = sub("^D_([0-9]+_)", "", drugconc)) %>%
dplyr::mutate(conc = as.integer(gsub("D_CHK_", "", conc)))
drugresp
}
drugresp <- get.drugresp(lpdCLL)
Use median polish to summarise drug response of the five concentrations
get.medp <- function(drugresp) {
tab = drugresp %>% group_by(drug, conc) %>%
do(data.frame(v = .$viab, ID = .$ID)) %>% spread(ID, v)
med.p = lapply(unique(tab$drug), function(n) {
tb = filter(tab, drug == n) %>% ungroup() %>% select(-(drug:conc)) %>%
as.matrix %>% `rownames<-`(1:5)
mdp = stats::medpolish(tb, trace.iter = FALSE)
df = as.data.frame(mdp$col) + mdp$overall
colnames(df) <- n
df
}) %>% do.call(cbind,.)
medp.viab = tbl_df(med.p) %>% mutate(ID = rownames(med.p)) %>%
gather(drug, viab, -ID)
medp.viab
}
drugresp.mp <- get.medp(drugresp)
corTest <- function(patID, viab, seaTable, ighv = NULL) {
viab <- setNames(viab, patID)
corTab <- lapply(seq(1,nrow(seaTable)), function(i) {
seaName <- rownames(seaTable)[i]
#remove NA samples in Seahorse entry
seaVal <- seaTable[seaName,]
seaVal <- seaVal[!is.na(seaVal)]
#get useable sample list
if (!is.null(ighv)) {
patList <- intersect(names(ighv), intersect(patID, names(seaVal)))
ighvVal <- ighv[patList]
} else {
patList <- intersect(patID, names(seaVal))
}
#subset drug value
drugVal <- viab[patList]
seaVal <- seaVal[patList]
#correlation test, block for IGHV
if (!is.null(ighv)) {
res <- summary(lm(seaVal ~ drugVal + ighvVal))
data.frame(seahorse = seaName,
p = res$coefficients[2,4],
coef = sqrt(res$r.squared) * sign(res$coefficients[2,3]),
stringsAsFactors = FALSE)
} else {
res <- cor.test(seaVal, drugVal, method = "pearson")
data.frame(seahorse = seaName,
p = res$p.value,
coef = res$estimate[[1]],
stringsAsFactors = FALSE)
}
}) %>% do.call(rbind,.)
}
Calculate correlation coefficient and p values
seaTest <- assays(seaSub)$seaMedian
resTab.noBlock <- group_by(drugresp.mp, drug) %>% do(corTest(.$ID, .$viab, seaTest, ighv = NULL)) %>% ungroup() %>%
mutate(p.adj = p.adjust(p, method = "BH"))
How many significant associations at 10% FDR?
resTab.noBlock %>% filter(p.adj <= 0.1) %>% nrow()
## [1] 118
How many drugs show at least one significant assocations?
resTab.noBlock %>% filter(p.adj <= 0.1) %>% filter(!duplicated(drug)) %>% nrow()
## [1] 32
Calculate correlation coefficient and p values
#get IGHV stauts
ighv <- Biobase::exprs(lpdAll)["IGHV Uppsala U/M",]
ighv <-ighv[!is.na(ighv)]
resTab <- group_by(drugresp.mp, drug) %>% do(corTest(.$ID, .$viab, seaTest, ighv = ighv)) %>% ungroup() %>%
mutate(p.adj = p.adjust(p, method = "BH"))
How many significant associations at 10% FDR?
resTab %>% filter(p.adj <= 0.1) %>% nrow()
## [1] 45
How many drugs show at least one significant assocations?
resTab %>% filter(p.adj <= 0.1) %>% filter(!duplicated(drug)) %>% nrow()
## [1] 19
ggplot(resTab.noBlock, aes(x=coef)) + geom_histogram(binwidth = 0.05, col = "darkblue", fill = "lightblue") + theme_classic() +
theme(panel.grid = element_blank(), axis.title = element_text(size = 12, face = "bold"),
axis.text = element_text(size=12), plot.title = element_text(size=15, hjust =.5, face = "bold")) + xlab("Pearson correlation coefficient")
Preocess table for plotting
atLeastOne <- group_by(resTab, drug) %>% summarise(sigNum = sum(p.adj <= 0.1)) %>% filter(sigNum > 0)
plotTab <- filter(resTab, drug %in% atLeastOne$drug) %>%
mutate(seahorse = ifelse(p.adj > 0.1, "not significant", seahorse))
#change mearement name
plotTab$seahorse <- sapply(plotTab$seahorse, function(x) {gsub("\\."," ",x)})
#define the group of seahorse measurement
measureType <- tibble(measure = rownames(seaCombat), type = rowData(seaCombat)$type) %>%
mutate(type = ifelse(type %in% c("ECAR.OCR","GST","ECAR"), "glycolysis", "respiration"))
#generate color list separately for each group
glyList <- setNames(tail(brewer.pal(9,"OrRd"),nrow(filter(measureType, type =="glycolysis"))),
filter(measureType, type =="glycolysis")$measure)
resList <- setNames(tail(brewer.pal(9,"GnBu"),nrow(filter(measureType, type =="respiration"))),
filter(measureType, type =="respiration")$measure)
nosig <- c("not significant" = "grey80")
colList <- c(glyList, resList, nosig)
names(colList) <- sapply(names(colList), function(x) {gsub("\\."," ",x)})
#order the factor for seahorse measurment
plotTab$seahorse <- factor(plotTab$seahorse, levels = names(colList))
#add the direction of correlation
plotTab <- mutate(plotTab, Direction = ifelse(coef > 0, "positive", "negative"))
#get the cutoff value
fdrCut <- max(filter(plotTab, seahorse != "not significant")$p)
Plot
p <- ggplot(data = plotTab, aes(x = drug,y=-log10(p), fill = seahorse, color = seahorse, shape = Direction)) +
geom_point(size=4) + scale_fill_manual(values = colList) + scale_color_manual(values = colList) +
geom_hline(yintercept = -log10(fdrCut), linetype="dotted") +
ylab(expression(-log[10]*'('*p~value*')')) + xlab("") + theme_bw() +
scale_shape_manual(values = c(positive = 24, negative = 25)) +
theme(axis.text.x = element_text(angle = 90, vjust=0.5, hjust = 1,size=15),
axis.text.y = element_text(size=15),
axis.title = element_text(size =15, face = "bold")) +
guides(fill="none", color = guide_legend(title = "Measurement"))
plot(p)
Preocess table for plotting
atLeastOne <- group_by(resTab.noBlock, drug) %>% summarise(sigNum = sum(p.adj <= 0.1)) %>% filter(sigNum > 0)
plotTab <- filter(resTab.noBlock, drug %in% atLeastOne$drug) %>%
mutate(seahorse = ifelse(p.adj > 0.1, "not significant", seahorse))
#change mearement name
plotTab$seahorse <- sapply(plotTab$seahorse, function(x) {gsub("\\."," ",x)})
#define the group of seahorse measurement
measureType <- tibble(measure = rownames(seaCombat), type = rowData(seaCombat)$type) %>%
mutate(type = ifelse(type %in% c("ECAR.OCR","GST","ECAR"), "glycolysis", "respiration"))
#generate color list separately for each group
glyList <- setNames(tail(brewer.pal(9,"OrRd"),nrow(filter(measureType, type =="glycolysis"))),
filter(measureType, type =="glycolysis")$measure)
resList <- setNames(tail(brewer.pal(9,"GnBu"),nrow(filter(measureType, type =="respiration"))),
filter(measureType, type =="respiration")$measure)
nosig <- c("not significant" = "grey80")
colList <- c(glyList, resList, nosig)
names(colList) <- sapply(names(colList), function(x) {gsub("\\."," ",x)})
#order the factor for seahorse measurment
plotTab$seahorse <- factor(plotTab$seahorse, levels = names(colList))
#add direction iformation
plotTab <- mutate(plotTab, Direction = ifelse(coef > 0, "positive", "negative"))
#get the cutoff value
fdrCut <- max(filter(plotTab, seahorse != "not significant")$p)
p <- ggplot(data = plotTab, aes(x = drug,y=-log10(p), fill = seahorse, color = seahorse, shape = Direction)) +
geom_point(size=4) + scale_fill_manual(values = colList) + scale_color_manual(values = colList) +
geom_hline(yintercept = -log10(fdrCut), linetype="dotted") +
ylab(expression(-log[10]*'('*p~value*')')) + xlab("") + theme_bw() +
scale_shape_manual(values = c(positive = 24, negative = 25)) +
ggtitle("Figure 4") +
theme(axis.text.x = element_text(angle = 90, vjust=0.5, hjust = 1,size=15),
axis.text.y = element_text(size=15),
axis.title = element_text(size =15, face = "bold"),
plot.title = element_text(size=20, face = "bold")) +
guides(fill="none", color = guide_legend(title = "Measurement"))
plot(p)
Scatter plot for all significant pairs
resTab.sig <- filter(resTab, p.adj <= 0.1)
scatterList <- lapply(seq(1,nrow(resTab.sig)), function(i){
seaName <- resTab.sig[i,]$seahorse
p <- resTab.sig[i,]$p
coef <- format(resTab.sig[i,]$coef,digits = 2)
drugName <- resTab.sig[i,]$drug
#remove NA samples in Seahorse entry
seaVal <- seaTest[seaName,]
seaVal <- seaVal[!is.na(seaVal)]
#get useable sample list
patList <- intersect(filter(drugresp.mp, drug == drugName)$ID, names(seaVal))
#set y label
if (seaName %in% c("ECAR.OCR.ration")) {
xLab = "ECAR/OCR"
} else if (seaName %in% c("maximal.respiration","spare.respiratory.capacity","basal.respiration",
"ATP.production")) {
xLab = "OCR (pMol/min)" } else xLab = "ECAR (pMol/min)"
#format seahorse measurement name
seaName.new <- ifelse(seaName == "ECAR.OCR.ratio", "ECAR/OCR", gsub("\\."," ",seaName))
#prepare title
plotTitle <- sprintf("%s ~ %s", drugName, seaName.new)
#prepare plot table
plotTab <- filter(drugresp.mp, drug == drugName, ID %in% patList) %>%
mutate(sea=seaVal[ID])
#prepare correlation test annotations
annoText <- paste("'coefficient ='~",coef,"*","', p ='~",sciPretty(p,digits=2))
limX <- max(plotTab$sea) + 2
midX <- max(plotTab$sea)/2
ggplot(plotTab, aes(x=sea,y=100*viab)) + geom_point(size=1) +
geom_smooth(method = "lmrob", se= FALSE) +
xlab(xLab) + ylab("% viability after drug treatment") +
theme_bw() + ggtitle(plotTitle) + coord_cartesian(xlim = c(-2,limX)) +
annotate("text", x = midX, y = Inf, label = annoText,
vjust=1, hjust=0.5, size = 5, parse = TRUE, col= "darkred") +
theme(panel.grid = element_blank(),
axis.title = element_text(size=13, face="bold"),
axis.text = element_text(size=12),
plot.title = element_text(face="bold", hjust=0.5, size = 15 ),
legend.position = "none",
axis.title.x = element_text(face="bold"),
plot.margin = unit(c(0.5,0.5,0.5,0.5), "cm"))
})
names(scatterList) <- paste0(resTab.sig$seahorse, "_", resTab.sig$drug)
A figure of selected drugs (Figure 5)
title = ggdraw() + draw_figure_label("Figure 5", fontface = "bold", position = "top.left",size=20)
p <- plot_grid(scatterList$glycolysis_rotenone,
scatterList$glycolysis_venetoclax,
scatterList$glycolytic.capacity_orlistat,
scatterList$`ECAR_KX2-391`,ncol=2)
plot_grid(title, p, rel_heights = c(0.05,0.95), ncol = 1)
Association tests for each concentration
corRes_conc <- group_by(drugresp, drug, conc) %>% do(corTest(.$ID, .$viab, seaTest, ighv = NULL)) %>% ungroup() %>%
mutate(p.adj = p.adjust(p, method = "BH"))
Prepare plot tab
drugList <- unique(filter(corRes_conc, p.adj < 0.1)$drug)
seaList <- unique(filter(corRes_conc, p.adj < 0.1)$seahorse)
plotTab <- filter(corRes_conc, drug %in% drugList,
seahorse %in% seaList) %>% mutate(concIndex = paste0("c",conc)) %>%
mutate(coef = ifelse(p.adj < 0.1, coef, 0),
seahorse = ifelse(seahorse != "ECAR.OCR.ratio", seahorse,
"ECAR/OCR")) %>%
mutate(seahorse = gsub("[.]","\n",seahorse)) %>%
mutate(seahorse = factor(seahorse))
#plot similar seahorse measurment together
plotTab$seahorse <- factor(plotTab$seahorse,
levels = levels(plotTab$seahorse)[c(3,4,5,6,7,
9,1,2,8,11,10)])
Heatmap plot for p values (Supplementary Figure)
ggplot(plotTab, aes(x=concIndex, y = drug, fill = coef)) + geom_tile(size = 0.3, color = "white") + facet_wrap(~ seahorse, nrow = 1) +
scale_fill_gradient2(low = "red", mid = "white", high = "blue", midpoint = 0,
limits =c(-0.6,0.6), labels = seq(-0.8,0.8, by = 0.2),
breaks = seq(-0.8,0.8, by = 0.2),
name = "Coefficient") +
theme_bw() + theme(strip.text = element_text(face = "bold"),
axis.text.y = element_text(size =12)) +
ylab("Drug name") + xlab("Concentration Index")
Get the drug response data of rotenone and orlistat
lpdCLL <- lpdAll[fData(lpdAll)$type == "viab",pData(lpdAll)$Diagnosis == "CLL"]
lpdSub <- lpdCLL[fData(lpdCLL)$name %in% c("rotenone","orlistat") &
! fData(lpdCLL)$subtype %in% c("4:5","1:5"),]
fData(lpdSub)$conc <- sapply(seq(1,nrow(lpdSub)), function(i) {
conctab[fData(lpdSub)[i,]$id, paste0("c",fData(lpdSub)[i,]$subtype)]
})
viabTab <- data.frame(Biobase::exprs(lpdSub)) %>% rownames_to_column("drugID") %>%
gather(key = "patID", value = "viab", -drugID) %>% as_tibble() %>%
mutate(conc = fData(lpdSub)[drugID,]$conc,
name = fData(lpdSub)[drugID,]$name) %>%
select(-drugID) %>% spread(key = name, value = viab)
Perform correlation test
testTab <- viabTab %>% group_by(conc) %>%
do(data.frame(p = cor.test(.$rotenone, .$orlistat)$p.value,
coef = cor(.$rotenone, .$orlistat)))
Plot the correlations (Supplementary Figure)
pList <- lapply(seq(nrow(testTab)), function(i) {
p = testTab[i,]$p
coef = format(testTab[i,]$coef,digits = 2)
conc1 = testTab[i,]$conc
plotTab <- filter(viabTab, conc == conc1)
annoText <- paste("'coefficient ='~",coef,"*","', p ='~",sciPretty(p,digits=2))
ggplot(plotTab, aes(x=100*rotenone, y = 100*orlistat)) + geom_point() + geom_smooth(method = "lmrob", se = FALSE) +
coord_cartesian(xlim = c(0,120), ylim = c(0,120)) +
ggtitle(bquote("Concentration ="~.(conc1)~mu*"M")) +
annotate("text", x = 0, y = 123, label = annoText, vjust=1, hjust=0, size =4 , color = "darkred",parse = TRUE) +
xlab("rotenone (% viability)") + ylab("orlistat (% viability)") +
theme_bw() +
theme(panel.grid = element_blank(),
axis.title = element_text(size=15, face="bold"),
axis.text = element_text(size=10),
plot.title = element_text(face="bold", hjust=0.5, size = 15 ),
legend.position = "none",
axis.title.x = element_text(face="bold"),
plot.margin = unit(c(0.5,0.5,0.5,0.5), "cm"))
})
grid.arrange(grobs= pList, ncol =2)
Load data set
data("dds", "patmeta","mutCOM","seaOri")
Only use CLL samples with IGHV annotations and have been used in seahorse experiments
#annotate IGHV status
dds$IGHV <- patmeta[match(dds$PatID, rownames(patmeta)),]$IGHV
dds$diag <- patmeta[match(dds$PatID, rownames(patmeta)),]$Diagnosis
#estimate size factor
dds <- estimateSizeFactors(dds)
#only choose CLL samples with IGHV annotations and have been used in seahorse experiments
ddsCLL <- dds[,dds$diag == "CLL" & !is.na(dds$IGHV) & dds$PatID %in% colnames(seaOri)]
Filter genes
#remove genes without gene symbol annotations
ddsCLL <- ddsCLL[! rowData(ddsCLL)$symbol %in% c(NA,""),]
ddsCLL <- ddsCLL[rowData(ddsCLL)$chromosome != "Y",]
#only keep genes that have counts higher than 10 in any sample
keep <- apply(counts(ddsCLL), 1, function(x) any(x >= 10))
ddsCLL <- ddsCLL[keep,]
# Remove transcripts which do not show variance across samples.
sds <- rowSds(counts(ddsCLL, normalized = TRUE))
sh <- shorth(sds)
ddsCLL <- ddsCLL[sds >= sh,]
#how many genes do we have
nrow(ddsCLL)
## [1] 13744
Variance stabilizing tranformation
ddsCLL.norm <- varianceStabilizingTransformation(ddsCLL)
Differential expression using DESeq2
ddsCLL$IGHV <- factor(ddsCLL$IGHV, levels = c("U", "M"))
design(ddsCLL) <- ~ IGHV
ddsCLL <- DESeq(ddsCLL, betaPrior = TRUE)
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 720 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
Get differential expression result
DEres <- as.tibble(results(ddsCLL, tidy = TRUE)) %>% mutate(symbol = rowData(ddsCLL)$symbol)
Function for converting DEseq results to enrichment analysis input
createInput <- function(DEres, pCut = 0.05, ifFDR = FALSE, rankBy = "stat") {
if (ifFDR) {
inputTab <- filter(DEres, padj <= pCut)
} else {
inputTab <- filter(DEres, pvalue <= pCut)
}
inputTab <- arrange(inputTab, pvalue) %>% filter(!duplicated(symbol)) %>% select_("symbol", rankBy) %>% data.frame(stringsAsFactors = FALSE)
rownames(inputTab) <- inputTab$symbol
inputTab$symbol <- NULL
colnames(inputTab) <- "stat"
return(inputTab)
}
load genesets
gmts = list(H=system.file("extdata","h.all.v5.1.symbols.gmt", package="BloodCancerMultiOmics2017"),
C6=system.file("extdata","c6.all.v5.1.symbols.gmt", package="BloodCancerMultiOmics2017"),
KEGG=system.file("extdata","c2.cp.kegg.v5.1.symbols.gmt", package = "BloodCancerMultiOmics2017"))
Enrichment analysis using Hallmarks gene set
enRes <- list()
inputTab <- createInput(DEres, pCut = 0.1, ifFDR = TRUE)
enRes[["Gene enrichment analysis"]] <- runGSEA(inputTab = inputTab, gmtFile = gmts$H, GSAmethod = "page")
## Checking arguments...done!
## Calculating gene set statistics...done!
## Calculating gene set significance...done!
## Adjusting for multiple testing...done!
#remove the HALLMARK_
enRes$`Gene enrichment analysis`$Name <- gsub("HALLMARK_","", enRes$`Gene enrichment analysis`$Name)
Plot hallmark result
enBar <- plotEnrichmentBar(enRes, pCut = 0.1, ifFDR = TRUE, setName = "Hallmark gene sets")
plot(enBar)
Prepare the data for heatmap
# load genes in the gene set
gsc <- loadGSC(gmts$H)
geneList <- gsc$gsc$HALLMARK_GLYCOLYSIS
#select differentially expressed genes
fdrCut <- 0.10
sigDE <- filter(DEres, padj <= fdrCut, log2FoldChange < 0) %>% filter(symbol %in% geneList) %>%
arrange(log2FoldChange)
#get the expression matrix
plotMat <- assay(ddsCLL.norm[sigDE$row,])
colnames(plotMat) <- ddsCLL.norm$PatID
rownames(plotMat) <- sigDE$symbol
#sort columns of plot matrix based on trisomy12 status
plotMat <- plotMat[,order(ddsCLL$IGHV)]
#calculate z-score and sensor
plotMat <- t(scale(t(plotMat)))
plotMat[plotMat >= 4] <- 4
plotMat[plotMat <= -4] <- -4
annoCol <- data.frame(row.names = ddsCLL.norm$PatID, `IGHV` = ddsCLL.norm$IGHV)
Plot the heatmap
#color for colum annotation
annoColor <- list(IGHV = c(M = "red", U = "grey80"))
hallHeatmap <- pheatmap(plotMat, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(100),
cluster_cols = FALSE, cluster_rows = FALSE,
annotation_col = annoCol, annotation_colors = annoColor,
show_colnames = FALSE, fontsize_row = 8, breaks = seq(-5,5, length.out = 101), treeheight_row = 0,
border_color = NA, main = "HALLMARK_GLYCOLYSIS",silent = TRUE)$gtable
grid.draw(hallHeatmap)
plotGenes <- c("PFKP","PGAM1","PGK1")
plotTab <- data.frame(assay(ddsCLL.norm[rowData(ddsCLL.norm)$symbol %in% plotGenes,]))
colnames(plotTab) <- ddsCLL.norm$PatID
plotTab$symbol <- rowData(ddsCLL.norm[rowData(ddsCLL.norm)$symbol %in% plotGenes,])$symbol
plotTab <- gather(plotTab, key = "PatID", value = "value", -symbol) %>%
mutate(IGHV = colData(ddsCLL.norm)[match(PatID, ddsCLL.norm$PatID),]$IGHV)
beePlot <- ggplot(plotTab, aes(x=IGHV, y = value)) +
stat_boxplot(geom = "errorbar", width = 0.3) +
geom_boxplot(outlier.shape = NA, col="black", width=0.4) +
geom_beeswarm(cex=2, size =0.5, aes(col = IGHV)) + theme_classic() +
xlab("") + ylab("normalized expression") +
scale_color_manual(values = c("M" = "red","U" = "grey30")) +
theme(axis.line.x = element_blank(), axis.ticks.x = element_blank(),
axis.title = element_text(size=12, face="bold"),
axis.text = element_text(size=12),
plot.title = element_text(face="bold", hjust=0.5),
legend.position = "none",
axis.title.x = element_text(face="bold"),
strip.background = element_blank(),
strip.text = element_text(size=15, face = "bold")) +
facet_wrap(~ symbol, scales = "free")
title = ggdraw() + draw_figure_label("Figure 3", fontface = "bold", position = "top.left",size=20)
p <- ggdraw() +
draw_plot(enBar, 0, 0.5, 0.5, 0.45) +
draw_plot(hallHeatmap, 0.5, 0, 0.5, 0.95) +
draw_plot(beePlot, 0, 0, 0.5, 0.4) +
draw_plot_label(c("A","B","C"), c(0, 0.5, 0), c(1, 1, 0.45), fontface = "plain", size=20)
plot_grid(title, p, rel_heights = c(0.05,0.95), ncol = 1)
Load data set
data("patmeta", "seaCombat","lpdAll", "pretreat","doublingTime")
Prepare data table for t-test
testTab <- assays(seaCombat)$seaMedian %>% data.frame() %>% rownames_to_column("Measurement") %>%
gather(key = "patID", value = "value", -Measurement) %>%
mutate(pretreated = pretreat[patID,]) %>%
mutate(pretreated = factor(pretreated),
IGHV = factor(Biobase::exprs(lpdAll)["IGHV Uppsala U/M",patID])) %>%
as.tibble()
Performing t-test for each measurement
tTest <- function(x, y) {
noNA <- !is.na(x) & !is.na(y)
res <- t.test(x[noNA] ~ y[noNA], equal.var = TRUE)
tibble(p = res$p.value,
dm = res$estimate[[2]] - res$estimate[[1]])
}
pTab <- group_by(testTab, Measurement) %>% do(tTest(.$value, .$pretreated)) %>% ungroup() %>%
mutate(p.adj = p.adjust(p, method = "BH"))
Perform ANOVA test, including IGHV as a blocking factor
aovTest <- function(x,y,block) {
noNA <- !is.na(x) & !is.na(y) & !is.na(block)
dataTab <- data.frame(x = x[noNA], y = y[noNA], block = block[noNA])
res <- anova(lm(x ~ y + block))
tibble(p = res$`Pr(>F)`[1])
}
pTab.IGHV <- group_by(testTab, Measurement) %>% do(aovTest(.$value, .$pretreated, .$IGHV)) %>%
ungroup() %>% mutate(p.adj = p.adjust(p, method = "BH"))
A table as output
outTable <- tibble(`Seahorse mearuement` = formatSea(pTab$Measurement),
`p value` = format(pTab$p, digits = 2),
`adjusted p` = format(pTab$p.adj, digits = 3),
`p value (IGHV blocked)` = format(pTab.IGHV$p, digits = 2),
`adjusted p (IGHV blocked)` = format(pTab.IGHV$p.adj, digits = 3))
write(
print(xtable(outTable, digits = 3,
caption = "Student's t-test and ANOVA test (IGHV blocked) results of energy metabolic measurements related to pretreatment status"),
include.rownames=FALSE,
caption.placement = "top")
,file = "section05/tTest_SeahorseVSpretreat.tex")
Association between IGHV status and pretreatment
IGHVtab <- filter(testTab, !duplicated(patID))
chiRes <- chisq.test(IGHVtab$pretreated, IGHVtab$IGHV)
chiRes
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: IGHVtab$pretreated and IGHVtab$IGHV
## X-squared = 18.157, df = 1, p-value = 2.034e-05
Plot
seaList <- c("maximal.respiration", "spare.respiratory.capacity")
plotList <- lapply(seaList, function(seaName) {
#prepare table for plotting
plotTab <- filter(testTab, Measurement == seaName) %>%
mutate(Measurement = formatSea(Measurement),
pretreated = ifelse(pretreated ==1,
sprintf("yes (n=%s)", sum(pretreated == 1)),
sprintf("no (n=%s)", sum(pretreated == 0))),
IGHV = ifelse(is.na(IGHV), "unknown",
ifelse(IGHV == 1, "mutated", "unmutated"))) %>%
mutate(IGHV = factor(IGHV, levels = c("mutated","unmutated","unknown"))) %>%
filter(!is.na(value))
#p value for annotation
pval <- filter(pTab, Measurement == seaName)$p
#color for IGHV status
colorList <- c(mutated = "red", unmutated = "black", unknown = "grey80")
#formatted seaname
seaTitle <- unique(plotTab$Measurement)
#title
plotTitle <- paste(sprintf("'%s (p = '~",seaTitle),
sciPretty(pval, digits = 2),"*')'")
ggplot(plotTab, aes(x=pretreated, y = value)) +
stat_boxplot(geom = "errorbar", width = 0.3) +
geom_boxplot(outlier.shape = NA, col="black", width=0.4) +
geom_beeswarm(cex=2, size =1, aes(col = IGHV)) + theme_classic() +
xlab("") + ylab("OCR (pMol/min)") + ggtitle(parse(text=plotTitle)) +
scale_color_manual(values = colorList) +
theme(axis.line.x = element_blank(), axis.ticks.x = element_blank(),
axis.title = element_text(size=12, face="bold"),
axis.text = element_text(size=12),
plot.title = element_text(hjust=0.5,size=15),
axis.title.x = element_text(face="bold"))
})
Save the plot
grid.arrange(grobs = plotList, ncol =2)
Pre-processing data
testTab <- assays(seaCombat)$seaMedian %>% data.frame() %>% rownames_to_column("Measurement") %>%
gather(key = "patID", value = "value", -Measurement) %>%
mutate(pretreated = pretreat[patID,],
doubling.time = LDT[patID,]) %>%
mutate(pretreated = factor(pretreated),
IGHV = factor(Biobase::exprs(lpdAll)["IGHV Uppsala U/M",patID])) %>%
filter(!is.na(doubling.time), !is.na(value)) %>%
as.tibble()
Correlation test between seahorse measurement and doubling time
corTest <- function(x, y, block = NULL, method = "pearson") {
if (is.null(block)) {
res <- cor.test(x,y, method = method)
tibble(p = res$p.value, coef = res$estimate[[1]])
} else {
tab <- data.frame(x = x, y = y, block = block)
res <- summary(lm( y ~ x + block, tab)) #how much y can be explained by x and block
tibble(p = res$coefficients[2,4],
coef = cor(x,y))
}
}
corRes <- group_by(testTab, Measurement) %>% do(corTest(.$value, .$doubling.time)) %>%
ungroup() %>% mutate(p.adj = p.adjust(p, method = "BH"))
Correlations between lymphocyte doubling time and IGHV stauts
pRes <- filter(testTab, !duplicated(patID)) %>%
do(data.frame(p = t.test(doubling.time ~ IGHV,.)$p.value))
pRes
## p
## 1 2.635161e-07
Correlation test between seahorse measurement and doubling time (Considering IGHV as cofactor)
corRes.aov <- group_by(testTab, Measurement) %>% filter(!is.na(IGHV)) %>%
do(corTest(x = .$value, y = .$doubling.time, block = .$IGHV)) %>%
ungroup() %>% mutate(p.adj = p.adjust(p, method = "BH"))
Correlation test between seahorse measurement and doubling time (within M-CLL)
corRes.M <- group_by(testTab, Measurement) %>% filter(IGHV %in% 1) %>%
do(corTest(x = .$value, y = .$doubling.time)) %>%
ungroup() %>% mutate(p.adj = p.adjust(p, method = "BH"))
Correlation test between seahorse measurement and doubling time (within U-CLL)
corRes.U <- group_by(testTab, Measurement) %>% filter(IGHV %in% 0) %>%
do(corTest(x = .$value, y = .$doubling.time)) %>%
ungroup() %>% mutate(p.adj = p.adjust(p, method = "BH"))
A table for output
outTable <- tibble(`Seahorse mearuement` = formatSea(corRes$Measurement),
`p value` = format(corRes$p, digits = 2),
`adjusted p` = format(corRes$p.adj, digits = 3),
`p value (IGHV blocked)` = format(corRes.aov$p, digits = 3),
`adjusted p (IGHV blocked)` = format(corRes.aov$p.adj, digits = 3))
write(
print(xtable(outTable, digits = 3,
caption = "Correlation tests between each Seahorse measurements and lymphocyte doubling time"),
include.rownames=FALSE,
caption.placement = "top")
,"section05/tTest_SeahorseVSldt.tex")
seaList <- c("glycolysis", "glycolytic.capacity")
plotList.cor <- lapply(seaList, function(seaName) {
#prepare table for plotting
plotTab <- filter(testTab, Measurement == seaName) %>%
mutate(Measurement = formatSea(Measurement),
IGHV = ifelse(is.na(IGHV), "unknown",
ifelse(IGHV == 1, "mutated", "unmutated"))) %>%
mutate(IGHV = factor(IGHV, levels = c("mutated","unmutated","unknown"))) %>%
filter(!is.na(value), !is.na(doubling.time))
#p value for annotation
pval <- filter(corRes, Measurement == seaName)$p
coef <- filter(corRes, Measurement == seaName)$coef
#color for IGHV status
colorList <- c(mutated = "red", unmutated = "black", unknown = "grey80")
#formatted seaname
seaTitle <- unique(plotTab$Measurement)
#prepare correlation test annotations
annoText <- paste("'coefficient ='~",format(coef,digits = 2),"*","', p ='~",sciPretty(pval,digits=2))
ggplot(plotTab, aes(x=value,y=doubling.time)) +
geom_point(size=1, aes(col = IGHV)) +
geom_smooth(method = "lm", se= FALSE) +
xlab("ECAR (pMol/min)") + ylab("Lymphocyte doubling time (days)") +
theme_bw() + ggtitle(seaTitle) +
scale_color_manual(values = colorList) +
annotate("text", x = -Inf, y = Inf, label = annoText,
vjust=1, hjust=0, size = 5, parse = TRUE, col= "darkred") +
theme(panel.grid = element_blank(),
axis.title = element_text(size=13, face="bold"),
axis.text = element_text(size=12),
plot.title = element_text(face="bold", hjust=0.5, size = 15 ),
legend.position = "none",
axis.title.x = element_text(face="bold"),
plot.margin = unit(c(0.5,0.5,0.5,0.5), "cm"))
})
Show the plot
grid.arrange(grobs = plotList.cor, ncol =2)
Combine and save the plot
plot_grid(plotList.cor[[1]], plotList.cor[[2]],
plotList[[1]], plotList[[2]],labels = "AUTO")
seaTable <- assays(seaCombat)$seaMedian
survT = patmeta[colnames(seaTable),]
survT[which(survT[,"IGHV"]=="U") ,"IGHV"] = 0
survT[which(survT[,"IGHV"]=="M") ,"IGHV"] = 1
survT$IGHV = as.numeric(survT$IGHV)
colnames(survT) = gsub("Age4Main", "age", colnames(survT))
#add seahorse measurement information
survT <- cbind(survT, t(seaTable))
# competinting risk endpoint fpr
survT$compE <- ifelse(survT$treatedAfter == TRUE, 1, 0)
survT$compE <- ifelse(survT$treatedAfter == FALSE & survT$died==TRUE,
2, survT$compE )
survT$T7 <- ifelse(survT$compE == 1, survT$T5, survT$T6 )
Function to calculate correlations
com <- function( Time, endpoint, scaleX, sub, d, split, drug_names) {
res <- lapply(d, function(g) {
#drug <- survT[,g] * scaleX
drug <- survT[,g]
#drug <- (drug - mean(drug, na.rm = TRUE))/sd(drug, na.rm=TRUE)
## all=99, M-CLL=1, U-CLL=0
if(sub==99) { surv <- coxph(Surv(survT[,paste0(Time)],
survT[,paste0(endpoint)] == TRUE) ~ drug)}
if(sub<99) { surv <- coxph(Surv(survT[,paste0(Time)],
survT[,paste0(endpoint)] == TRUE) ~ drug,
subset=survT[,paste0(split)]==sub)}
c(summary(surv)[[7]][,5], summary(surv)[[7]][,2],
summary(surv)[[8]][,3],
summary(surv)[[8]][,4])
})
s <- do.call(rbind, res)
colnames(s) <- c("p", "HR", "lower", "higher")
rownames(s) <- drug_names
s
}
All samples
d <- rownames(seaTable)
sea_names <- formatSea(d)
ttt <- com(Time="T5", endpoint="treatedAfter", sub=99, d=d,
split="IGHV", drug_names=sea_names, scaleX=1)
os <- com(Time="T6", endpoint="died", sub=99, d=d, split="",
drug_names=sea_names, scaleX=1)
#TTT result
ttt
## p HR lower higher
## basal respiration 0.78487361 1.0045508 0.97232990 1.037839
## ATP production 0.61856940 1.0089384 0.97420635 1.044909
## proton leak 0.79523162 0.9941888 0.95137805 1.038926
## maximal respiration 0.04329390 1.0083468 1.00025052 1.016509
## spare respiratory capacity 0.02330588 1.0109372 1.00148009 1.020484
## glycolysis 0.43103151 1.0277871 0.96000913 1.100350
## glycolytic capacity 0.05305026 1.0258682 0.99966404 1.052759
## glycolytic reserve 0.03378313 1.0358886 1.00270606 1.070169
## ECAR/OCR 0.17679455 0.2350961 0.02876541 1.921411
## OCR 0.10457119 1.0250543 0.99487711 1.056147
## ECAR 0.57398172 0.9790316 0.90930921 1.054100
#OS result
os
## p HR lower higher
## basal respiration 0.445743838 1.0209332 0.96799051 1.076772
## ATP production 0.054477450 1.0627155 0.99883212 1.130685
## proton leak 0.230798917 0.9585454 0.89441307 1.027276
## maximal respiration 0.241401237 1.0082235 0.99450296 1.022133
## spare respiratory capacity 0.260001165 1.0093938 0.99310455 1.025950
## glycolysis 0.153194519 1.0908935 0.96813843 1.229213
## glycolytic capacity 0.005552633 1.0728828 1.02084207 1.127576
## glycolytic reserve 0.003918966 1.0943347 1.02931759 1.163459
## ECAR/OCR 0.867915535 1.3239198 0.04849462 36.143461
## OCR 0.727618843 1.0086884 0.96076018 1.059008
## ECAR 0.623678252 1.0313524 0.91169617 1.166713
M-CLL samples
d <- rownames(seaTable)
sea_names <- formatSea(d)
ttt.M <- com(Time="T5", endpoint="treatedAfter", sub=1, d=d,
split="IGHV", drug_names=sea_names, scaleX=1)
os.M <- com(Time="T6", endpoint="died", sub=1, d=d,
split="IGHV",drug_names=sea_names, scaleX=1)
#TTT result
ttt.M
## p HR lower higher
## basal respiration 0.66972710 1.01302726 0.9545326975 1.075106
## ATP production 0.26823880 1.03545664 0.9735172775 1.101337
## proton leak 0.39840912 0.96862771 0.8995549818 1.043004
## maximal respiration 0.53665512 1.00492687 0.9893814783 1.020717
## spare respiratory capacity 0.57126071 1.00514690 0.9874422205 1.023169
## glycolysis 0.19675546 0.91814324 0.8064834965 1.045263
## glycolytic capacity 0.83294236 0.99483645 0.9481134323 1.043862
## glycolytic reserve 0.65507343 1.01398165 0.9540555009 1.077672
## ECAR/OCR 0.08563539 0.02940216 0.0005271614 1.639891
## OCR 0.28309251 1.03189088 0.9744044931 1.092769
## ECAR 0.23953728 0.92290950 0.8074193715 1.054919
#OS result
os.M
## p HR lower higher
## basal respiration 0.30412487 0.9499771 0.86140242 1.0476596
## ATP production 0.27513686 1.0632632 0.95234566 1.1870990
## proton leak 0.01414948 0.8720443 0.78169459 0.9728369
## maximal respiration 0.34145002 0.9847134 0.95395111 1.0164676
## spare respiratory capacity 0.46372706 0.9870185 0.95311644 1.0221264
## glycolysis 0.95777589 1.0058566 0.81031426 1.2485865
## glycolytic capacity 0.13875994 1.0781006 0.97593525 1.1909610
## glycolytic reserve 0.06252895 1.1162450 0.99426344 1.2531918
## ECAR/OCR 0.46124679 7.5627436 0.03477659 1644.6435373
## OCR 0.29729407 0.9364032 0.82755839 1.0595638
## ECAR 0.97528422 1.0035431 0.80235073 1.2551851
U-CLL samples
d <- rownames(seaTable)
sea_names <- formatSea(d)
ttt.U <- com(Time="T5", endpoint="treatedAfter", sub=0, d=d,
split="IGHV", drug_names=sea_names, scaleX=1)
os.U <- com(Time="T6", endpoint="died", sub=0, d=d,
split="IGHV",drug_names=sea_names, scaleX=1)
#TTT result
ttt.U
## p HR lower higher
## basal respiration 0.57970659 0.9884185 0.94849803 1.030019
## ATP production 0.45865707 0.9827452 0.93853098 1.029042
## proton leak 0.86897345 1.0054747 0.94232066 1.072861
## maximal respiration 0.27177986 1.0054096 0.99578218 1.015130
## spare respiratory capacity 0.14010915 1.0085995 0.99719158 1.020138
## glycolysis 0.38446733 1.0468223 0.94424958 1.160537
## glycolytic capacity 0.06412587 1.0360918 0.99792112 1.075723
## glycolytic reserve 0.04355106 1.0517270 1.00146086 1.104516
## ECAR/OCR 0.63296402 0.4761677 0.02265924 10.006323
## OCR 0.51066854 1.0123061 0.97607840 1.049878
## ECAR 0.69072636 0.9805743 0.89025029 1.080062
#OS result
os.U
## p HR lower higher
## basal respiration 0.35128211 1.0296727 0.968269501 1.094970
## ATP production 0.40353629 1.0318067 0.958719943 1.110465
## proton leak 0.73041348 1.0175484 0.921682426 1.123386
## maximal respiration 0.25523712 1.0084698 0.993923097 1.023229
## spare respiratory capacity 0.28477364 1.0095579 0.992113741 1.027309
## glycolysis 0.30623093 1.0918435 0.922700156 1.291993
## glycolytic capacity 0.06146802 1.0659453 0.996936433 1.139731
## glycolytic reserve 0.05659909 1.0861820 0.997679695 1.182535
## ECAR/OCR 0.95731501 0.8838535 0.009613555 81.259948
## OCR 0.82813436 1.0059134 0.953769322 1.060908
## ECAR 0.74229021 1.0253082 0.883397069 1.190016
Function for km plot
#need to be fixed
km <- function(survT, seaName, split, titlePlot, t, hr, c, pvals = NULL) {
#function for km plot
survS <- survT
#filter_(survT, sprintf("!is.na(%s)",split),
# sprintf("!is.na(%s)",seaName))
k <- survS[ , seaName]
ms5 <- maxstat.test(Surv(T5, treatedAfter) ~ k,
data = survS,
smethod = "LogRank",
minprop = 0.2,
maxprop = 0.8,
alpha = NULL)
ms6 <- maxstat.test(Surv(T6, died) ~ k,
data = survS,
smethod = "LogRank",
minprop = 0.2,
maxprop = 0.8,
alpha = NULL)
med <- median(k, na.rm = TRUE)
# median
if (c=="med") {
survS$cutA <- ifelse(k >= med, "high", "low")
survS$cutM <- ifelse(k >= med, "high", "low")
survS$cutU <- ifelse(k >= med, "high", "low")
}
#maxstat & TTT
if (c=="maxstat" & t=="TTT") {
survS$cutA <- ifelse(k >= ms5$estimate, "high", "low")
survS$cutM <- ifelse(k >= ms5$estimate, "high", "low")
survS$cutU <- ifelse(k >= ms5$estimate, "high", "low")
}
#OS & maxstat
if (c=="maxstat" & t=="OS") {
survS$cutA <- ifelse(k >= ms6$estimate, "high", "low")
survS$cutM <- ifelse(k >= ms6$estimate, "high", "low")
survS$cutU <- ifelse(k >= ms6$estimate, "high", "low")
}
p <- list() #list for storing plots
#prepare p value annotations
if (is.null(pvals)){
pvalList <- rep(FALSE,3)
} else {
pvalList <- pvals
}
#subset according to genetic factor
survM <- survS[survS[,split] %in% 1,]
survU <- survS[survS[,split] %in% 0,]
inputName <- formatSea(seaName)
if (t=="TTT") {
yl <- "Fraction w/o treatment"
#if (c=="med"){ cat(sprintf("%s median-cutpoint for TTT: %5.2f\n", inputName, median(k) ) ) } else
# { cat(sprintf("%s cutpoint for TTT: %5.2g\n", inputName, ms5$estimate )) }
p[[1]] <- ggsurvplot(survfit(Surv(T5, treatedAfter) ~ cutA, data = survS),
data = survS, pval = pvalList[1],
ylab = yl, xlab = "Time (years)", title = inputName,
legend.labs = c("high","low"), palette = c("red","blue"),
ggtheme = theme(plot.title = element_text(vjust =0.5, size = 18),
axis.text = element_text(size= 15)))$plot
p[[2]] <- ggsurvplot(survfit(Surv(T5, treatedAfter) ~ cutM, data = survM),
data = survS, pval = pvalList[2],
ylab = yl, xlab = "Time (years)",
title = paste(inputName, titlePlot[1], titlePlot[3]),
legend.labs = c("high","low"), palette = c("red","blue"),
ggtheme = theme(plot.title = element_text(vjust =0.5, size = 18),
axis.text = element_text(size= 15)))$plot
p[[3]] <- ggsurvplot(survfit(Surv(T5, treatedAfter) ~ cutU, data = survU),
data = survS, pval = pvalList[3],
ylab = yl, xlab = "Time (years)",
title = paste(inputName, titlePlot[1], titlePlot[2]),
legend.labs = c("high","low"), palette = c("red","blue"),
ggtheme = theme(plot.title = element_text(vjust =0.5, size = 18),
axis.text = element_text(size= 15)))$plot
}
# OS
else {
yl <- "Fraction overall survival"
#if (c=="med"){ cat(sprintf("%s median-cutpoint for OS: %5.2f\n", inputName, median(k) ) ) } else
# { cat(sprintf("%s cutpoint for OS: %5.2f\n", inputName, ms6$estimate )) }
p[[1]] <- ggsurvplot(survfit(Surv(T5, died) ~ cutA, data = survS), data = survS,
ylab = yl, xlab = "Time (years)", pval = pvalList[1],
title = inputName, legend.labs = c("high","low"), palette = c("red","blue"),
ggtheme = theme(plot.title = element_text(vjust =0.5, size = 18),
axis.text = element_text(size= 15)))$plot
p[[2]] <- ggsurvplot(survfit(Surv(T5, died) ~ cutM, data = survM), data = survM,
ylab = yl, xlab = "Time (years)", pval = pvalList[2],
title = paste(inputName, titlePlot[1], titlePlot[3]),
legend.labs = c("high","low"), palette = c("red","blue"),
ggtheme = theme(plot.title = element_text(vjust =0.5, size = 18),
axis.text = element_text(size= 15)))$plot
p[[3]] <- ggsurvplot(survfit(Surv(T5, died) ~ cutU, data = survU), data = survU,
ylab = yl, xlab = "Time (years)", pval = pvalList[3],
title = paste(inputName, titlePlot[1], titlePlot[2]),
legend.labs = c("high","low"), palette = c("red","blue"),
ggtheme = theme(plot.title = element_text(vjust =0.5, size = 18),
axis.text = element_text(size= 15)))$plot
}
p
}
Time to next treatment (maxstat).
TTTplot.maxstat <- list()
for (seaName in rownames(seaCombat)) {
seaName.format <- formatSea(seaName)
pvals <- c(ttt[seaName.format ,"p"], ttt.M[seaName.format ,"p"], ttt.U[seaName.format ,"p"])
pvals <- sapply(pvals, function(x) {
sprintf("p value = %1.2f",x)
})
TTTplot.maxstat[[seaName]] <- km(survT, seaName = seaName, split = "IGHV", t="TTT",
titlePlot =c("(IGHV", "umutated)", "mutated)"), hr="tr", c="maxstat", pvals = pvals)
}
Overall survival (maxstat).
OSplot.maxstat <- list()
for (seaName in rownames(seaCombat)) {
seaName.format <- formatSea(seaName)
pvals <- c(os[seaName.format ,"p"], os.M[seaName.format ,"p"], os.U[seaName.format ,"p"])
pvals <- sapply(pvals, function(x) {
sprintf("p value = %1.2f",x)
})
OSplot.maxstat[[seaName]] <- km(survT, seaName = seaName, split = "IGHV", t="OS",
titlePlot =c("(IGHV", "umutated)", "mutated)"), hr="tr", c="maxstat", pvals = pvals)
}
KM plot for all samples (not shown in manuscript)
plot_grid(TTTplot.maxstat$glycolytic.reserve[[1]], TTTplot.maxstat$maximal.respiration[[1]],
TTTplot.maxstat$spare.respiratory.capacity[[1]],
OSplot.maxstat$glycolytic.capacity[[1]], OSplot.maxstat$glycolytic.reserve[[1]],
labels = "AUTO")
KM plot for IGHV stratified samples (not shown in manuscript)
plot_grid(TTTplot.maxstat$glycolytic.reserve[[2]], TTTplot.maxstat$glycolytic.reserve[[3]],
TTTplot.maxstat$maximal.respiration[[2]], TTTplot.maxstat$maximal.respiration[[3]],
TTTplot.maxstat$spare.respiratory.capacity[[2]], TTTplot.maxstat$spare.respiratory.capacity[[3]],
OSplot.maxstat$glycolytic.capacity[[2]], OSplot.maxstat$glycolytic.capacity[[3]],
OSplot.maxstat$glycolytic.reserve[[2]], OSplot.maxstat$glycolytic.reserve[[3]],
labels = "AUTO", ncol = 2)
Time to next treatment (median).
TTTplot.med <- list()
for (seaName in rownames(seaCombat)) {
seaName.format <- formatSea(seaName)
pvals <- c(ttt[seaName.format ,"p"], ttt.M[seaName.format ,"p"], ttt.U[seaName.format ,"p"])
pvals <- sapply(pvals, function(x) {
sprintf("p value = %1.2f",x)
})
TTTplot.med[[seaName]] <- km(survT, seaName = seaName, split = "IGHV", t="TTT",
titlePlot =c("(IGHV", "umutated)", "mutated)"), hr="tr", c="med", pvals = pvals)
}
Overall survival (median).
OSplot.med <- list()
for (seaName in rownames(seaCombat)) {
seaName.format <- formatSea(seaName)
pvals <- c(os[seaName.format ,"p"], os.M[seaName.format ,"p"], os.U[seaName.format ,"p"])
pvals <- sapply(pvals, function(x) {
sprintf("p value = %1.2f",x)
})
OSplot.med[[seaName]] <- km(survT, seaName = seaName, split = "IGHV", t="OS",
titlePlot =c("(IGHV", "umutated)", "mutated)"), hr="tr", c="med", pvals = pvals)
}
KM plot for all samples (Figure 6)
title = ggdraw() + draw_figure_label("Figure 6", fontface = "bold", position = "top.left",size=20)
p<-plot_grid(TTTplot.med$glycolytic.reserve[[1]], TTTplot.med$maximal.respiration[[1]],
TTTplot.med$spare.respiratory.capacity[[1]],
OSplot.med$glycolytic.capacity[[1]], OSplot.med$glycolytic.reserve[[1]],
labels = NULL)
plot_grid(title, p, rel_heights = c(0.05,0.95), ncol = 1)
KM plot for IGHV stratified samples (not shown in manuscript)
plot_grid(TTTplot.med$glycolytic.reserve[[2]], TTTplot.med$glycolytic.reserve[[3]],
TTTplot.med$maximal.respiration[[2]], TTTplot.med$maximal.respiration[[3]],
TTTplot.med$spare.respiratory.capacity[[2]], TTTplot.med$spare.respiratory.capacity[[3]],
OSplot.med$glycolytic.capacity[[2]], OSplot.med$glycolytic.capacity[[3]],
OSplot.med$glycolytic.reserve[[2]], OSplot.med$glycolytic.reserve[[3]],
labels = "AUTO", ncol = 2)
seaTable <- assays(seaCombat)$seaMedian
survT = patmeta[colnames(seaTable),]
survT[which(survT[,"IGHV"]=="U") ,"IGHV"] = 0
survT[which(survT[,"IGHV"]=="M") ,"IGHV"] = 1
survT$IGHV = as.numeric(survT$IGHV)
colnames(survT) = gsub("Age4Main", "age", colnames(survT))
#add seahorse measurement information
survT <- cbind(survT, t(mscale(seaTable)))
# competinting risk endpoint fpr
survT$compE <- ifelse(survT$treatedAfter == TRUE, 1, 0)
survT$compE <- ifelse(survT$treatedAfter == FALSE & survT$died==TRUE,
2, survT$compE )
survT$T7 <- ifelse(survT$compE == 1, survT$T5, survT$T6 )
#genetic variants
survT$SF3B1 <- Biobase::exprs(lpdAll)[ "SF3B1", rownames(survT) ]
survT$NOTCH1 <- Biobase::exprs(lpdAll)[ "NOTCH1", rownames(survT) ]
survT$BRAF <- Biobase::exprs(lpdAll)[ "BRAF", rownames(survT) ]
survT$TP53 <- Biobase::exprs(lpdAll)[ "TP53", rownames(survT) ]
survT$del17p13 <- Biobase::exprs(lpdAll)[ "del17p13", rownames(survT) ]
survT$del11q22.3 <- Biobase::exprs(lpdAll)[ "del11q22.3", rownames(survT) ]
survT$trisomy12 <- Biobase::exprs(lpdAll)[ "trisomy12", rownames(survT) ]
extractSome <- function(x) {
sumsu <- summary(x)
data.frame(
`p-value` =
sprintf("%6.3g", sumsu[["coefficients"]][, "Pr(>|z|)"]),
`HR` =
sprintf("%6.3g", signif( sumsu[["coefficients"]][, "exp(coef)"], 2) ),
`lower 95% CI` =
sprintf("%6.3g", signif( sumsu[["conf.int"]][, "lower .95"], 2) ),
`upper 95% CI` =
sprintf("%6.3g", signif( sumsu[["conf.int"]][, "upper .95"], 2),
check.names = FALSE) )
}
Define covariates and effects.
glycolytic reserve
surv1 <- coxph(
Surv(T5, treatedAfter) ~
age +
as.factor(IC50beforeTreatment) +
as.factor(trisomy12) +
as.factor(del11q22.3) +
as.factor(del17p13) +
as.factor(TP53) +
IGHVwt +
glycolytic.reserve, # continuous
data = survT )
colFactor <- data.frame(factor = c("age", "pretreatment",
"trisomy12", "del11q22.3",
"del17p13","TP53","U-CLL",
"glycolytic reserve"))
outTab <- cbind(colFactor,extractSome(surv1))
write(print(xtable(outTab,
caption = "Multivariate Cox regression model for time to treatment with glycolytic reserve as a covariate"),
include.rownames=FALSE,
caption.placement = "top"), file = "section05/glyRes_TTT.tex")
## % latex table generated in R 3.5.0 by xtable 1.8-2 package
## % Thu May 24 17:14:50 2018
## \begin{table}[ht]
## \centering
## \caption{Multivariate Cox regression model for time to treatment with glycolytic reserve as a covariate}
## \begin{tabular}{lllll}
## \hline
## factor & p.value & HR & lower.95..CI & upper.95..CI \\
## \hline
## age & 0.0744 & 0.81 & 0.64 & 1 \\
## pretreatment & 2.3e-05 & 0.21 & 0.1 & 0.43 \\
## trisomy12 & 0.32 & 1.6 & 0.64 & 3.8 \\
## del11q22.3 & 0.679 & 0.83 & 0.36 & 2 \\
## del17p13 & 0.658 & 1.2 & 0.52 & 2.9 \\
## TP53 & 0.123 & 1.9 & 0.84 & 4.1 \\
## U-CLL & 0.0863 & 1.8 & 0.92 & 3.7 \\
## glycolytic reserve & 0.413 & 1.2 & 0.76 & 1.9 \\
## \hline
## \end{tabular}
## \end{table}
cat(sprintf("%s patients considerd in the model; number of events %1g\n",
summary(surv1)$n, summary(surv1)[6] ) )
## 89 patients considerd in the model; number of events 47
maximal respiration
surv1 <- coxph(
Surv(T5, treatedAfter) ~
age +
as.factor(IC50beforeTreatment) +
as.factor(trisomy12) +
as.factor(del11q22.3) +
as.factor(del17p13) +
as.factor(TP53) +
IGHVwt +
maximal.respiration, # continuous
data = survT )
colFactor <- data.frame(factor = c("age", "pretreatment",
"trisomy12", "del11q22.3",
"del17p13","TP53","U-CLL",
"maximal respiration"))
outTab <- cbind(colFactor,extractSome(surv1))
write(print(xtable(outTab,
caption = "Multivariate Cox regression model for time to treatment with maximal respiration as a covariate"),
include.rownames=FALSE,
caption.placement = "top"), file = "section05/maxRes_TTT.tex")
## % latex table generated in R 3.5.0 by xtable 1.8-2 package
## % Thu May 24 17:14:50 2018
## \begin{table}[ht]
## \centering
## \caption{Multivariate Cox regression model for time to treatment with maximal respiration as a covariate}
## \begin{tabular}{lllll}
## \hline
## factor & p.value & HR & lower.95..CI & upper.95..CI \\
## \hline
## age & 0.0183 & 0.78 & 0.63 & 0.96 \\
## pretreatment & 4.13e-05 & 0.25 & 0.13 & 0.48 \\
## trisomy12 & 0.115 & 2 & 0.85 & 4.5 \\
## del11q22.3 & 0.604 & 1.2 & 0.56 & 2.7 \\
## del17p13 & 0.918 & 0.96 & 0.4 & 2.3 \\
## TP53 & 0.101 & 2 & 0.87 & 4.5 \\
## U-CLL & 0.0276 & 2.1 & 1.1 & 3.9 \\
## maximal respiration & 0.0562 & 1.4 & 0.99 & 2 \\
## \hline
## \end{tabular}
## \end{table}
cat(sprintf("%s patients considerd in the model; number of events %1g\n",
summary(surv1)$n, summary(surv1)[6] ) )
## 102 patients considerd in the model; number of events 55
spare respiratory capacity
surv1 <- coxph(
Surv(T5, treatedAfter) ~
age +
as.factor(IC50beforeTreatment) +
as.factor(trisomy12) +
as.factor(del11q22.3) +
as.factor(del17p13) +
as.factor(TP53) +
IGHVwt +
spare.respiratory.capacity, # continuous
data = survT )
colFactor <- data.frame(factor = c("age", "pretreatment",
"trisomy12", "del11q22.3",
"del17p13","TP53","U-CLL",
"spare.respiratory.capacity"))
outTab <- cbind(colFactor,extractSome(surv1))
write(print(xtable(outTab,
caption = "Multivariate Cox regression model for time to treatment with glycolytic reserve as a covariate"),
include.rownames=FALSE,
caption.placement = "top"), file = "section05/spResCap_TTT.tex")
## % latex table generated in R 3.5.0 by xtable 1.8-2 package
## % Thu May 24 17:14:50 2018
## \begin{table}[ht]
## \centering
## \caption{Multivariate Cox regression model for time to treatment with glycolytic reserve as a covariate}
## \begin{tabular}{lllll}
## \hline
## factor & p.value & HR & lower.95..CI & upper.95..CI \\
## \hline
## age & 0.0183 & 0.78 & 0.63 & 0.96 \\
## pretreatment & 4.06e-05 & 0.25 & 0.13 & 0.48 \\
## trisomy12 & 0.109 & 2 & 0.86 & 4.5 \\
## del11q22.3 & 0.631 & 1.2 & 0.55 & 2.7 \\
## del17p13 & 0.911 & 0.95 & 0.4 & 2.3 \\
## TP53 & 0.114 & 1.9 & 0.85 & 4.4 \\
## U-CLL & 0.0256 & 2.1 & 1.1 & 4 \\
## spare.respiratory.capacity & 0.0505 & 1.4 & 1 & 2 \\
## \hline
## \end{tabular}
## \end{table}
cat(sprintf("%s patients considerd in the model; number of events %1g\n",
summary(surv1)$n, summary(surv1)[6] ) )
## 102 patients considerd in the model; number of events 55
glycolytic reserve
surv1 <- coxph(
Surv(T6, died) ~
age +
as.factor(IC50beforeTreatment) +
as.factor(trisomy12) +
as.factor(del11q22.3) +
as.factor(del17p13) +
as.factor(TP53) +
IGHVwt +
glycolytic.reserve, # continuous
data = survT )
colFactor <- data.frame(factor = c("age", "pretreatment",
"trisomy12", "del11q22.3",
"del17p13","TP53","U-CLL",
"glycolytic reserve"))
outTab <- cbind(colFactor,extractSome(surv1))
write(print(xtable(outTab,
caption = "Multivariate Cox regression model for overall survival with glycolytic reserve as a covariate"),
include.rownames=FALSE,
caption.placement = "top"), file = "section05/glyRes_OS.tex")
## % latex table generated in R 3.5.0 by xtable 1.8-2 package
## % Thu May 24 17:14:50 2018
## \begin{table}[ht]
## \centering
## \caption{Multivariate Cox regression model for overall survival with glycolytic reserve as a covariate}
## \begin{tabular}{lllll}
## \hline
## factor & p.value & HR & lower.95..CI & upper.95..CI \\
## \hline
## age & 0.69 & 1.1 & 0.74 & 1.6 \\
## pretreatment & 0.0013 & 0.087 & 0.02 & 0.39 \\
## trisomy12 & 0.0663 & 4.6 & 0.9 & 24 \\
## del11q22.3 & 0.322 & 0.49 & 0.12 & 2 \\
## del17p13 & 0.761 & 0.79 & 0.17 & 3.7 \\
## TP53 & 0.685 & 1.3 & 0.36 & 4.8 \\
## U-CLL & 0.0817 & 3.1 & 0.87 & 11 \\
## glycolytic reserve & 0.28 & 1.5 & 0.72 & 3.1 \\
## \hline
## \end{tabular}
## \end{table}
cat(sprintf("%s patients considerd in the model; number of events %1g\n",
summary(surv1)$n, summary(surv1)[6] ) )
## 95 patients considerd in the model; number of events 16
glycolytic capacity
surv1 <- coxph(
Surv(T6, died) ~
age +
as.factor(IC50beforeTreatment) +
as.factor(trisomy12) +
as.factor(del11q22.3) +
as.factor(del17p13) +
as.factor(TP53) +
IGHVwt +
glycolytic.capacity, # continuous
data = survT )
colFactor <- data.frame(factor = c("age", "pretreatment",
"trisomy12", "del11q22.3",
"del17p13","TP53","U-CLL",
"glycolytic capacity"))
outTab <- cbind(colFactor,extractSome(surv1))
write(print(xtable(outTab,
caption = "Multivariate Cox regression model for overall survival with glycolytic capacity as a covariate"),
include.rownames=FALSE,
caption.placement = "top"), file = "section05/glyCap_OS.tex")
## % latex table generated in R 3.5.0 by xtable 1.8-2 package
## % Thu May 24 17:14:51 2018
## \begin{table}[ht]
## \centering
## \caption{Multivariate Cox regression model for overall survival with glycolytic capacity as a covariate}
## \begin{tabular}{lllll}
## \hline
## factor & p.value & HR & lower.95..CI & upper.95..CI \\
## \hline
## age & 0.738 & 1.1 & 0.73 & 1.6 \\
## pretreatment & 0.00114 & 0.082 & 0.018 & 0.37 \\
## trisomy12 & 0.0687 & 4.5 & 0.89 & 23 \\
## del11q22.3 & 0.251 & 0.44 & 0.11 & 1.8 \\
## del17p13 & 0.676 & 0.72 & 0.15 & 3.4 \\
## TP53 & 0.679 & 1.3 & 0.35 & 4.9 \\
## U-CLL & 0.0871 & 3.1 & 0.85 & 11 \\
## glycolytic capacity & 0.383 & 1.4 & 0.64 & 3.2 \\
## \hline
## \end{tabular}
## \end{table}
cat(sprintf("%s patients considerd in the model; number of events %1g\n",
summary(surv1)$n, summary(surv1)[6] ) )
## 95 patients considerd in the model; number of events 16
Loading the data.
data(list=c("conctab", "drpar", "drugs", "patmeta", "lpdAll", "dds", "mutCOM",
"methData","seaCombat","pretreat"))
For gene expression and methylation data
#only consider CLL patients
CLLPatients<-rownames(patmeta)[which(patmeta$Diagnosis=="CLL")]
e<-dds
colnames(e)<-colData(e)$PatID
#Methylation Data
methData = t(assay(methData))
methPCA <- prcomp(methData, center = T, scale. = TRUE)$x[,1:20]
#RNA Data
eCLL<-e[,colnames(e) %in% CLLPatients]
#filter out genes without gene name
eCLL<-eCLL[(!rowData(eCLL)$symbol %in% c("",NA)),]
#filter out low count genes
###
minrs <- 100
rs <- rowSums(assay(eCLL))
eCLL<-eCLL[ rs >= minrs, ]
#variance stabilize the data
vstCounts<-varianceStabilizingTransformation(eCLL)
vstCounts<-assay(vstCounts)
#filter out low variable genes
ntop<-5000
vstCountsFiltered<-vstCounts[order(apply(vstCounts, 1, var, na.rm=T),
decreasing = T)[1:ntop],]
eData<-t(vstCountsFiltered)
#Prepare PCA
pcRes <- prcomp(eData, center = T, scale. = TRUE)
rnaPCA <- pcRes$x[,1:20]
pcLoad <- pcRes$rotation[,1:20]
For genetic data
#genetics
mutCOMbinary<-channel(mutCOM, "binary")
mutCOMbinary<-mutCOMbinary[featureNames(mutCOMbinary) %in% colnames(seaCombat),]
genData<-Biobase::exprs(mutCOMbinary)
idx <- which(colnames(genData) %in% c("del13q14_bi", "del13q14_mono"))
genData <- genData[,-idx]
colnames(genData)[which(colnames(genData)=="del13q14_any")] = "del13q14"
#remove gene with higher than 20% missing values
genData <- genData[,colSums(is.na(genData))/nrow(genData) <= 0.2]
#fill the missing value with majority
genData <- apply(genData, 2, function(x) {
xVec <- x
avgVal <- mean(x,na.rm= TRUE)
if (avgVal >= 0.5) {
xVec[is.na(xVec)] <- 1
} else xVec[is.na(xVec)] <- 0
xVec
})
For IGHV and methylation cluster
#IGHV
translation <- c(`U` = 0, `M` = 1)
stopifnot(all(patmeta$IGHV %in% c("U","M", NA)))
IGHVData <- matrix(translation[patmeta$IGHV],
dimnames = list(rownames(patmeta), "IGHV"), ncol = 1)
IGHVData<-IGHVData[rownames(IGHVData) %in% CLLPatients,,drop=F]
#remove patiente with NA IGHV status
IGHVData<-IGHVData[!is.na(IGHVData), ,drop=F]
# Methylation cluster
translation <- c(`HP` = 2, `IP` = 1, `LP` = 0)
Mcluster <- matrix(translation[patmeta$ConsClust],
dimnames = list(rownames(patmeta), "ConsCluster"), ncol = 1)
Mcluster <- Mcluster[rownames(Mcluster) %in% CLLPatients,,drop=F]
Mcluster <- Mcluster[!is.na(Mcluster), ,drop=F]
For demographic and clinical data
#demographics (age and sex)
patmeta<-subset(patmeta, Diagnosis=="CLL")
gender <- ifelse(patmeta[,"Gender"]=="m",0,1)
# impute missing values in age by mean
ImputeByMean <- function(x) {x[is.na(x)] <- mean(x, na.rm=TRUE); return(x)}
age<-ImputeByMean(patmeta[,"Age4Main"])
demogrData <- cbind(age=age,gender=gender)
rownames(demogrData) <- rownames(patmeta)
#Pretreatment
pretreated<- pretreat
For drug response data
#Remove bad drugs. Bortezomib lost its activity during storage. The data for this drug and NSC 74859 were discarded from further analysis.
badrugs = c("D_008", "D_025")
lpdCLL <- lpdAll[!fData(lpdAll)$id %in% badrugs, pData(lpdAll)$Diagnosis == "CLL"]
# get drug responsee data
get.drugresp <- function(lpd) {
drugresp = t(Biobase::exprs(lpd[fData(lpd)$type == 'viab'])) %>%
tbl_df %>% dplyr::select(-ends_with(":5")) %>%
dplyr::mutate(ID = colnames(lpd)) %>%
tidyr::gather(drugconc, viab, -ID) %>%
dplyr::mutate(drug = drugs[substring(drugconc, 1, 5), "name"],
conc = sub("^D_([0-9]+_)", "", drugconc)) %>%
dplyr::mutate(conc = as.integer(gsub("D_CHK_", "", conc)))
drugresp
}
drugresp <- get.drugresp(lpdCLL)
#Use median polish to summarise drug response of the five concentrations
get.medp <- function(drugresp) {
tab = drugresp %>% group_by(drug, conc) %>%
do(data.frame(v = .$viab, ID = .$ID)) %>% spread(ID, v)
med.p = lapply(unique(tab$drug), function(n) {
tb = filter(tab, drug == n) %>% ungroup() %>%
dplyr::select(-(drug:conc)) %>%
as.matrix %>% `rownames<-`(1:5)
mdp = stats::medpolish(tb, trace.iter = FALSE)
df = as.data.frame(mdp$col) + mdp$overall
colnames(df) <- n
df
}) %>% bind_cols()
medp.viab = tbl_df(med.p) %>% mutate(ID = colnames(tab)[3:ncol(tab)]) %>%
gather(drug, viab, -ID)
medp.viab
}
drugresp.mp <- get.medp(drugresp)
viabData <- dcast(drugresp.mp, ID ~ drug, value.var = "viab" )
rownames(viabData) <- viabData$ID
viabData$ID <- NULL
Prepare seahorse meaurement (response vector)
sea <- t(assays(seaCombat)$seaMedian)
Function to Generate the explanatory dataset for each seahorse measurements
#function to generate response vector and explainatory variable for each seahorse measurement
generateData <- function(inclSet, onlyCombine = FALSE, censor = NULL, robust = FALSE) {
dataScale <- function(x, censor = NULL, robust = FALSE) {
#function to scale different variables
if (length(unique(na.omit(x))) == 2){
#a binary variable, change to -0.5 and 0.5 for 1 and 2
x - 0.5
} else if (length(unique(na.omit(x))) == 3) {
#catagorical varialbe with 3 levels, methylation_cluster, change to -0.5,0,0.5
(x - 1)/2
} else {
if (robust) {
#continuous variable, centered by median and divied by 2*mad
mScore <- (x-median(x,na.rm=TRUE))/(1.4826*mad(x,na.rm=TRUE))
if (!is.null(censor)) {
mScore[mScore > censor] <- censor
mScore[mScore < -censor] <- -censor
}
mScore/2
} else {
mScore <- (x-mean(x,na.rm=TRUE))/(sd(x,na.rm=TRUE))
if (!is.null(censor)) {
mScore[mScore > censor] <- censor
mScore[mScore < -censor] <- -censor
}
mScore/2
}
}
}
allResponse <- list()
allExplain <- list()
for (measure in colnames(inclSet$seahorse)) {
y <- inclSet$seahorse[,measure]
y <- y[!is.na(y)]
#get overlapped samples for each dataset
overSample <- names(y)
for (eachSet in inclSet) {
overSample <- intersect(overSample,rownames(eachSet))
}
y <- dataScale(y[overSample], censor = censor, robust = robust)
#generate explainatory variable table for each seahorse measurement
expTab <- list()
if ("drugs" %in% names(inclSet)) {
viabTab <- inclSet$drugs[overSample,]
vecName <- sprintf("drugs(%s)",ncol(viabTab))
colnames(viabTab) <- paste0("con.",colnames(viabTab))
expTab[[vecName]] <- apply(viabTab,2,dataScale,censor = censor, robust = robust)
}
if ("gen" %in% names(inclSet)) {
geneTab <- inclSet$gen[overSample,]
#at least 3 mutated sample
geneTab <- geneTab[, colSums(geneTab) >= 3]
vecName <- sprintf("genetic(%s)", ncol(geneTab))
expTab[[vecName]] <- apply(geneTab,2,dataScale)
}
if ("RNA" %in% names(inclSet)){
#for PCA
rnaPCA <- inclSet$RNA[overSample, ]
colnames(rnaPCA) <- paste0("con.expression",colnames(rnaPCA), sep = "")
expTab[["expression(20)"]] <- apply(rnaPCA,2,dataScale, censor = censor, robust = robust)
}
if ("meth" %in% names(inclSet)){
methPCA <- inclSet$meth[overSample,]
colnames(methPCA) <- paste("con.methylation",colnames(methPCA),sep = "")
expTab[["methylation(20)"]] <- apply(methPCA[,1:20],2,dataScale, censor = censor, robust = robust)
}
if ("IGHV" %in% names(inclSet)) {
IGHVtab <- inclSet$IGHV[overSample,,drop=FALSE]
expTab[["IGHV(1)"]] <- apply(IGHVtab,2,dataScale)
}
if ("Mcluster" %in% names(inclSet)) {
methTab <- inclSet$Mcluster[overSample,,drop=FALSE]
colnames(methTab) <- "methylation cluster"
expTab[["methCluster(1)"]] <- apply(methTab,2,dataScale)
}
if ("demographics" %in% names(inclSet)){
demoTab <- inclSet$demographics[overSample,]
vecName <- sprintf("demographics(%s)", ncol(demoTab))
expTab[[vecName]] <- apply(demoTab,2,dataScale)
}
if ("pretreated" %in% names(inclSet)){
preTab <- inclSet$pretreated[overSample,,drop=FALSE]
expTab[["pretreated(1)"]] <- apply(preTab,2,dataScale)
}
comboTab <- c()
for (eachSet in names(expTab)){
comboTab <- cbind(comboTab, expTab[[eachSet]])
}
vecName <- sprintf("all(%s)", ncol(comboTab))
expTab[[vecName]] <- comboTab
measureName <- sprintf("%s(%s)",formatSea(measure),length(y))
allResponse[[measureName]] <- y
allExplain[[measureName]] <- expTab
}
if (onlyCombine) {
#only return combined results, for feature selection
allExplain <- lapply(allExplain, function(x) x[length(x)])
}
return(list(allResponse=allResponse, allExplain=allExplain))
}
Clean and integrate multi-omics data
inclSet<-list(RNA=rnaPCA, meth=methPCA, gen=genData, IGHV=IGHVData,
demographics=demogrData, drugs=viabData, pretreated=pretreated, seahorse = sea)
cleanData <- generateData(inclSet, censor = 4)
Function for multi-variate regression
runGlm <- function(X, y, method = "ridge", repeats=20, folds = 3) {
modelList <- list()
lambdaList <- c()
varExplain <- c()
coefMat <- matrix(NA, ncol(X), repeats)
rownames(coefMat) <- colnames(X)
if (method == "lasso"){
alpha = 1
} else if (method == "ridge") {
alpha = 0
}
for (i in seq(repeats)) {
if (ncol(X) > 2) {
res <- cv.glmnet(X,y, type.measure = "mse", family="gaussian",
nfolds = folds, alpha = alpha, standardize = FALSE)
lambdaList <- c(lambdaList, res$lambda.min)
modelList[[i]] <- res
coefModel <- coef(res, s = "lambda.min")[-1] #remove intercept row
coefMat[,i] <- coefModel
#calculate variance explained
y.pred <- predict(res, s = "lambda.min", newx = X)
varExp <- cor(as.vector(y),as.vector(y.pred))^2
varExplain[i] <- ifelse(is.na(varExp), 0, varExp)
} else {
fitlm<-lm(y~., data.frame(X))
varExp <- summary(fitlm)$r.squared
varExplain <- c(varExplain, varExp)
}
}
list(modelList = modelList, lambdaList = lambdaList, varExplain = varExplain, coefMat = coefMat)
}
Perform lasso regression
lassoResults <- list()
for (eachMeasure in names(cleanData$allResponse)) {
dataResult <- list()
for (eachDataset in names(cleanData$allExplain[[eachMeasure]])) {
y <- cleanData$allResponse[[eachMeasure]]
X <- cleanData$allExplain[[eachMeasure]][[eachDataset]]
glmRes <- runGlm(X, y, method = "lasso", repeats = 100, folds = 3)
dataResult[[eachDataset]] <- glmRes
}
lassoResults[[eachMeasure]] <- dataResult
}
Function for plotting variance explained for each measurement
Show the plot
grid.arrange(grobs = varList, ncol = 4)
Prepare clean data for feature selection
inclSet<-list(RNA=rnaPCA, gen=genData, IGHV=IGHVData,
drugs=viabData, seahorse = sea)
cleanData <- generateData(inclSet, onlyCombine = TRUE, censor = 4)
Perform lasso regression
lassoResults <- list()
for (eachMeasure in names(cleanData$allResponse)) {
dataResult <- list()
for (eachDataset in names(cleanData$allExplain[[eachMeasure]])) {
y <- cleanData$allResponse[[eachMeasure]]
X <- cleanData$allExplain[[eachMeasure]][[eachDataset]]
glmRes <- runGlm(X, y, method = "lasso", repeats = 100, folds = 3)
dataResult[[eachDataset]] <- glmRes
}
lassoResults[[eachMeasure]] <- dataResult
}
Function for the heatmap plot
lassoPlot <- function(lassoOut, cleanData, freqCut = 1, coefCut = 0.01, setNumber = "last") {
plotList <- list()
if (setNumber == "last") {
setNumber <- length(lassoOut[[1]])
} else {
setNumber <- setNumber
}
for (seaName in names(lassoOut)) {
#for the barplot on the left of the heatmap
barValue <- rowMeans(lassoOut[[seaName]][[setNumber]]$coefMat)
freqValue <- rowMeans(abs(sign(lassoOut[[seaName]][[setNumber]]$coefMat)))
barValue <- barValue[abs(barValue) >= coefCut & freqValue >= freqCut] # a certain threshold
barValue <- barValue[order(barValue)]
if(length(barValue) == 0) {
plotList[[seaName]] <- NA
next
}
#for the heatmap and scatter plot below the heatmap
allData <- cleanData$allExplain[[seaName]][[setNumber]]
seaValue <- cleanData$allResponse[[seaName]]*2 #back to Z-score
tabValue <- allData[, names(barValue),drop=FALSE]
ord <- order(seaValue)
seaValue <- seaValue[ord]
tabValue <- tabValue[ord, ,drop=FALSE]
sampleIDs <- rownames(tabValue)
tabValue <- as.tibble(tabValue)
#change scaled binary back to catagorical
for (eachCol in colnames(tabValue)) {
if (strsplit(eachCol, split = "[.]")[[1]][1] != "con") {
tabValue[[eachCol]] <- as.integer(as.factor(tabValue[[eachCol]]))
}
else {
tabValue[[eachCol]] <- tabValue[[eachCol]]*2 #back to Z-score
}
}
tabValue$Sample <- sampleIDs
#Mark different rows for different scaling in heatmap
matValue <- gather(tabValue, key = "Var",value = "Value", -Sample)
matValue$Type <- "mut"
#For continuious value
matValue$Type[grep("con.",matValue$Var)] <- "con"
#for methylation_cluster
matValue$Type[grep("ConsCluster",matValue$Var)] <- "meth"
#change the scale of the value, let them do not overlap with each other
matValue[matValue$Type == "mut",]$Value = matValue[matValue$Type == "mut",]$Value + 10
matValue[matValue$Type == "meth",]$Value = matValue[matValue$Type == "meth",]$Value + 20
#color scale for viability
idx <- matValue$Type == "con"
myCol <- colorRampPalette(c('dark red','white','dark blue'),
space = "Lab")
if (sum(idx) != 0) {
matValue[idx,]$Value = round(matValue[idx,]$Value,digits = 2)
minViab <- min(matValue[idx,]$Value)
maxViab <- max(matValue[idx,]$Value)
limViab <- max(c(abs(minViab), abs(maxViab)))
scaleSeq1 <- round(seq(-limViab, limViab,0.01), digits=2)
color4viab <- setNames(myCol(length(scaleSeq1+1)), nm=scaleSeq1)
} else {
scaleSeq1 <- round(seq(0,1,0.01), digits=2)
color4viab <- setNames(myCol(length(scaleSeq1+1)), nm=scaleSeq1)
}
#change continues measurement to discrete measurement
matValue$Value <- factor(matValue$Value,levels = sort(unique(matValue$Value)))
#change order of heatmap
names(barValue) <- gsub("con.", "", names(barValue))
matValue$Var <- gsub("con.","",matValue$Var)
matValue$Var <- factor(matValue$Var, levels = names(barValue))
matValue$Sample <- factor(matValue$Sample, levels = names(seaValue))
#plot the heatmap
p1 <- ggplot(matValue, aes(x=Sample, y=Var)) + geom_tile(aes(fill=Value), color = "gray") +
theme_bw() + scale_y_discrete(expand=c(0,0)) + theme(axis.text.y=element_text(hjust=0, size=10), axis.text.x=element_blank(), axis.ticks=element_blank(), panel.border=element_rect(colour="gainsboro"), plot.title=element_text(size=12), panel.background=element_blank(), panel.grid.major=element_blank(), panel.grid.minor=element_blank()) + xlab("patients") + ylab("") + scale_fill_manual(name="Mutated", values=c(color4viab, `11`="gray96", `12`='black', `21`='lightgreen', `22`='green',`23` = 'green4'),guide=FALSE) + ggtitle(seaName)
#Plot the bar plot on the left of the heatmap
barDF = data.frame(barValue, nm=factor(names(barValue),levels=names(barValue)))
p2 <- ggplot(data=barDF, aes(x=nm, y=barValue)) +
geom_bar(stat="identity", fill="lightblue", colour="black", position = "identity", width=.66, size=0.2) +
theme_bw() + geom_hline(yintercept=0, size=0.3) + scale_x_discrete(expand=c(0,0.5)) +
scale_y_continuous(expand=c(0,0)) + coord_flip(ylim=c(min(barValue),max(barValue))) +
theme(panel.grid.major=element_blank(), panel.background=element_blank(), axis.ticks.y = element_blank(),
panel.grid.minor = element_blank(), axis.text=element_text(size=8), panel.border=element_blank()) +
xlab("") + ylab("") + geom_vline(xintercept=c(0.5), color="black", size=0.6)
#Plot the scatter plot under the heatmap
# scatterplot below
scatterDF = data.frame(X=factor(names(seaValue), levels=names(seaValue)), Y=seaValue)
p3 <- ggplot(scatterDF, aes(x=X, y=Y)) + geom_point(shape=21, fill="dimgrey", colour="black", size=1.2) + theme_bw() + theme(panel.grid.minor=element_blank(), panel.grid.major.x=element_blank(), axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank(), axis.text.y=element_text(size=8), panel.border=element_rect(colour="dimgrey", size=0.1), panel.background=element_rect(fill="gray96"))
#Scale bar for continuous variable
Vgg = ggplot(data=data.frame(x=1, y=as.numeric(names(color4viab))), aes(x=x, y=y, color=y)) + geom_point() +
scale_color_gradientn(name="Z-score", colours =color4viab) + theme(legend.title=element_text(size=12), legend.text=element_text(size=10))
#Assemble all the plots togehter
# construct the gtable
wdths = c(1.5, 0.2, 1.3*ncol(matValue), 1.4, 1)
hghts = c(0.1, 0.0015*nrow(matValue), 0.16, 0.5)
gt = gtable(widths=unit(wdths, "in"), heights=unit(hghts, "in"))
## make grobs
gg1 = ggplotGrob(p1)
gg2 = ggplotGrob(p2)
gg3 = ggplotGrob(p3)
gg4 = ggplotGrob(Vgg)
## fill in the gtable
gt = gtable_add_grob(gt, gtable_filter(gg2, "panel"), 2, 1) # add barplot
gt = gtable_add_grob(gt, gtable_filter(gg1, "panel"), 2, 3) # add heatmap
gt = gtable_add_grob(gt, gtable_filter(gg1, "title"), 1, 3) #add title to plot
gt = gtable_add_grob(gt, gtable_filter(gg3, "panel"), 4, 3) # add scatterplot
gt = gtable_add_grob(gt, gtable_filter(gg2, "axis-b"), 3, 1) # y axis for barplot
gt = gtable_add_grob(gt, gtable_filter(gg3, "axis-l"), 4, 2) # y axis for scatter plot
gt = gtable_add_grob(gt, gtable_filter(gg1, "axis-l"), 2, 4) # variable names
gt = gtable_add_grob(gt, gtable_filter(gg4, "guide-box"), 2, 5) # scale bar for continous variables
#plot
#grid.draw(gt)
plotList[[seaName]] <- gt
}
return(plotList)
}
Plot all heatmaps
heatMaps <- lassoPlot(lassoResults, cleanData, freqCut = 0.8)
Arrange for the main figure (Figure 7)
title = ggdraw() + draw_figure_label("Figure 7", fontface = "bold", position = "top.left",size=20)
p <- ggdraw() +
draw_plot(varList[[1]], 0,0.6,0.25,0.4) +
draw_plot(varList[[4]], 0.25,0.6,0.25,0.4) +
draw_plot(varList[[6]], 0.5,0.6,0.25,0.4) +
draw_plot(varList[[7]], 0.75,0.6,0.25,0.4) +
draw_plot(heatMaps[[1]], 0, 0, 1, 0.3 ) +
draw_plot(heatMaps[[6]], 0, 0.3, 1, 0.3 ) +
draw_plot_label(c("A","B"), c(0,0), c(1, 0.6),size=20)
plot_grid(title, p, rel_heights = c(0.05,0.95), ncol = 1)
Plot other feature for supplementary file
allList <- list(varList[[2]], heatMaps[[2]],
varList[[4]], heatMaps[[4]],
varList[[5]], heatMaps[[5]],
varList[[7]], heatMaps[[7]],
varList[[8]], heatMaps[[8]],
varList[[9]], heatMaps[[9]],
varList[[11]], heatMaps[[11]])
plot_grid(plotlist = allList, rel_widths = c(1,4,1,4),ncol = 4)
Prepare signature sets
gmts = list(H=system.file("extdata","h.all.v5.1.symbols.gmt", package="BloodCancerMultiOmics2017"))
Enrichment using HALLMARK
plotPC <- colnames(pcLoad)[c(2,4, 6, 8,10,11)]
enHallmark <- lapply(plotPC, function(pc) {
inputTab <- data.frame(ID = rowData(dds[rownames(pcLoad),])$symbol,
stat = pcLoad[,pc]) %>%
arrange(abs(stat)) %>% distinct(ID, .keep_all = TRUE) %>%
column_to_rownames("ID")
res <- runGSEA(inputTab = inputTab, gmtFile = gmts$H, GSAmethod = "page")
res$Name <- gsub("HALLMARK_","", res$Name)
res
})
## Checking arguments...done!
## Calculating gene set statistics...done!
## Calculating gene set significance...done!
## Adjusting for multiple testing...done!
## Checking arguments...done!
## Calculating gene set statistics...done!
## Calculating gene set significance...done!
## Adjusting for multiple testing...done!
## Checking arguments...done!
## Calculating gene set statistics...done!
## Calculating gene set significance...done!
## Adjusting for multiple testing...done!
## Checking arguments...done!
## Calculating gene set statistics...done!
## Calculating gene set significance...done!
## Adjusting for multiple testing...done!
## Checking arguments...done!
## Calculating gene set statistics...done!
## Calculating gene set significance...done!
## Adjusting for multiple testing...done!
## Checking arguments...done!
## Calculating gene set statistics...done!
## Calculating gene set significance...done!
## Adjusting for multiple testing...done!
names(enHallmark) <- sapply(plotPC,
function(x) paste("expression",x, collapse = ""))
plotHallmark1 <- jyluMisc::plotEnrichmentBar(enHallmark[1:3], pCut = 0.05, setName = "", ifFDR = TRUE)
plotHallmark2 <- jyluMisc::plotEnrichmentBar(enHallmark[4:6], pCut = 0.05, setName = "", ifFDR = TRUE)
#save to pdf manually
ggdraw() +
draw_plot(plotHallmark1, 0,0,0.5,1) +
draw_plot(plotHallmark2, 0.5,0.4,0.5,0.6)
sessionInfo()
## R version 3.5.0 (2018-04-23)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS High Sierra 10.13.4
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] grid parallel stats4 stats graphics grDevices utils
## [8] datasets methods base
##
## other attached packages:
## [1] bindrcpp_0.2.2 forcats_0.3.0
## [3] stringr_1.3.1 dplyr_0.7.5
## [5] purrr_0.2.4 readr_1.1.1
## [7] tidyr_0.8.1 tibble_1.4.2
## [9] tidyverse_1.2.1 gtable_0.2.0
## [11] reshape2_1.4.3 RColorBrewer_1.1-2
## [13] glmnet_2.0-16 foreach_1.4.4
## [15] Matrix_1.2-14 survminer_0.4.2
## [17] ggpubr_0.1.6 magrittr_1.5
## [19] maxstat_0.7-25 survival_2.42-3
## [21] DESeq2_1.20.0 robustbase_0.93-0
## [23] pheatmap_1.0.10 genefilter_1.62.0
## [25] gridExtra_2.3 piano_1.20.0
## [27] cowplot_0.9.2 xtable_1.8-2
## [29] ggbeeswarm_0.6.0 ggplot2_2.2.1
## [31] SummarizedExperiment_1.10.1 DelayedArray_0.6.0
## [33] BiocParallel_1.14.1 matrixStats_0.53.1
## [35] Biobase_2.40.0 GenomicRanges_1.32.3
## [37] GenomeInfoDb_1.16.0 IRanges_2.14.10
## [39] S4Vectors_0.18.2 BiocGenerics_0.26.0
## [41] BloodCancerMultiOmics2017_1.0.0 seahorseCLL_1.0.0
## [43] BiocStyle_2.8.1
##
## loaded via a namespace (and not attached):
## [1] readxl_1.1.0 backports_1.1.2 drc_3.0-1
## [4] jyluMisc_0.1.5 Hmisc_4.1-1 fastmatch_1.1-0
## [7] plyr_1.8.4 igraph_1.2.1 lazyeval_0.2.1
## [10] splines_3.5.0 TH.data_1.0-8 digest_0.6.15
## [13] htmltools_0.3.6 gdata_2.18.0 checkmate_1.8.5
## [16] memoise_1.1.0 cluster_2.0.7-1 openxlsx_4.0.17
## [19] limma_3.36.1 annotate_1.58.0 modelr_0.1.2
## [22] sandwich_2.4-0 colorspace_1.3-2 rvest_0.3.2
## [25] blob_1.1.1 haven_1.1.1 xfun_0.1
## [28] crayon_1.3.4 RCurl_1.95-4.10 jsonlite_1.5
## [31] bindr_0.1.1 zoo_1.8-1 iterators_1.0.9
## [34] glue_1.2.0 zlibbioc_1.26.0 XVector_0.20.0
## [37] car_3.0-0 DEoptimR_1.0-8 abind_1.4-5
## [40] scales_0.5.0.9000 mvtnorm_1.0-7 DBI_1.0.0
## [43] relations_0.6-8 Rcpp_0.12.17 plotrix_3.7-1
## [46] cmprsk_2.2-7 htmlTable_1.11.2 foreign_0.8-70
## [49] bit_1.1-13 km.ci_0.5-2 ipflasso_0.1
## [52] Formula_1.2-3 htmlwidgets_1.2 httr_1.3.1
## [55] fgsea_1.6.0 gplots_3.0.1 acepack_1.4.1
## [58] pkgconfig_2.0.1 XML_3.98-1.11 nnet_7.3-12
## [61] locfit_1.5-9.1 labeling_0.3 tidyselect_0.2.4
## [64] rlang_0.2.0.9001 AnnotationDbi_1.42.1 cellranger_1.1.0
## [67] munsell_0.4.3 tools_3.5.0 cli_1.0.0
## [70] RSQLite_2.1.1 devtools_1.13.5 broom_0.4.4
## [73] evaluate_0.10.1 ggdendro_0.1-20 yaml_2.1.19
## [76] knitr_1.20 bit64_0.9-7 survMisc_0.5.4
## [79] caTools_1.17.1 nlme_3.1-137 slam_0.1-43
## [82] xml2_1.2.0 compiler_3.5.0 rstudioapi_0.7
## [85] curl_3.2 beeswarm_0.2.3 marray_1.58.0
## [88] geneplotter_1.58.0 stringi_1.2.2 lattice_0.20-35
## [91] psych_1.8.4 KMsurv_0.1-5 pillar_1.2.2
## [94] data.table_1.11.2 bitops_1.0-6 R6_2.2.2
## [97] latticeExtra_0.6-28 bookdown_0.7 rio_0.5.10
## [100] KernSmooth_2.23-15 vipor_0.4.5 codetools_0.2-15
## [103] MASS_7.3-50 gtools_3.5.0 exactRankTests_0.8-29
## [106] assertthat_0.2.0 rprojroot_1.3-2 withr_2.1.2
## [109] mnormt_1.5-5 multcomp_1.4-8 GenomeInfoDbData_1.1.0
## [112] hms_0.4.2 rpart_4.1-13 rmarkdown_1.9
## [115] carData_3.0-1 sets_1.0-18 lubridate_1.7.4
## [118] base64enc_0.1-3